diff --git a/modules/bibauthorid/lib/bibauthorid_personid_tables_utils.py b/modules/bibauthorid/lib/bibauthorid_personid_tables_utils.py index c26634f27..efea88746 100644 --- a/modules/bibauthorid/lib/bibauthorid_personid_tables_utils.py +++ b/modules/bibauthorid/lib/bibauthorid_personid_tables_utils.py @@ -1,1663 +1,1663 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ bibauthorid_personid_tables_utils Bibauthorid's personid related DB handler """ import sys import time import threading import bibauthorid_config as bconfig import bibauthorid_structs as dat from search_engine import get_record from dbquery import run_sql from dbquery import OperationalError, ProgrammingError from bibauthorid_utils import split_name_parts, create_normalized_name from bibauthorid_utils import clean_name_string from bibauthorid_authorname_utils import compare_names from threading import Thread from access_control_engine import acc_authorize_action from webuser import collect_user_info def update_personID_table_from_paper(papers_list=[]): ''' Updates the personID table removing the bibrec/bibrefs couples no longer existing (after a paper has been updated (name changed)) @param: list of papers to consider for the update (bibrecs) (('1'),) ''' for paper in papers_list: fullbibrefs100 = run_sql("select id_bibxxx from bibrec_bib10x where id_bibrec=%s", (paper[0],)) fullbibrefs700 = run_sql("select id_bibxxx from bibrec_bib70x where id_bibrec=%s", (paper[0],)) fullbibrefs100str = '( ' for i in fullbibrefs100: fullbibrefs100str += " '" + str(i[0]) + "'," fullbibrefs100str = fullbibrefs100str[0:len(fullbibrefs100str) - 1] + ' )' fullbibrefs700str = '( ' for i in fullbibrefs700: fullbibrefs700str += " '" + str(i[0]) + "'," fullbibrefs700str = fullbibrefs700str[0:len(fullbibrefs700str) - 1] + ' )' if len(fullbibrefs100) >= 1: bibrefs100 = run_sql("select id from bib10x where tag='100__a' and id in %s" % fullbibrefs100str) else: bibrefs100 = [] if len(fullbibrefs700) >= 1: bibrefs700 = run_sql("select id from bib70x where tag='700__a' and id in %s" % fullbibrefs700str) else: bibrefs700 = [] bibrecreflist = [] for i in bibrefs100: bibrecreflist.append('100:' + str(i[0]) + ',' + str(paper[0])) for i in bibrefs700: bibrecreflist.append('700:' + str(i[0]) + ',' + str(paper[0])) if bconfig.TABLES_UTILS_DEBUG: print "update_personID_table_from_paper: searching for pids owning " + str(paper[0]) pid_rows = run_sql("select * from aidPERSONID where tag='paper' and data like %s", ('%,' + str(paper[0]),)) #finally, if a bibrec/ref pair is in the authornames table but not in this list that name of that paper #is no longer existing and must be removed from the table. The new one will be addedd by the #update procedure in future; this entry will be risky becouse the garbage collector may #decide to kill the bibref in the bibX0x table for row in pid_rows: if row[3] not in bibrecreflist: other_bibrefs = [b[3] for b in pid_rows if b[1] == row[1] and b[3] != row[3]] if len(other_bibrefs) == 1: if bconfig.TABLES_UTILS_DEBUG: print "update_personID_table_from_paper: deleting " + str(row) + ' and updating ' + str(other_bibrefs[0]) #we have one and only one sobstitute, we can switch them! run_sql("delete from aidPERSONID where id = %s", (str(row[0]),)) run_sql("update aidPERSONID set flag=%s,lcul=%s where id=%s", (str(row[4]), str(row[5]), str(other_bibrefs[0][0]))) else: if bconfig.TABLES_UTILS_DEBUG: print "update_personID_table_from_paper: deleting " + str(row) run_sql("delete from aidPERSONID where id = %s", (str(row[0]),)) else: if bconfig.TABLES_UTILS_DEBUG: print "update_personID_table_from_paper: not touching " + str(row) def personid_perform_cleanup(): ''' Performs a consistency cleanup on the data in personID tables. It is usually not needed to have papers manually assigned to a personID to be even rejected from a different personID. This method thus takes care of eliminating such a redudancy in the table where it happens. It's not done during the update process for speed reasons. ''' #consistency check: #papers which have been assigned by users should appear in only one place #This will no longer be needed if the update_from_algorithm will be modified #to take that into account, now it is not for performance reasons run_sql("delete from aidPERSONID where tag='paper' and flag <='-1' and \ data in (select data from aidPERSONID where tag='paper' and flag='2')") def confirm_papers_to_person(pid, papers, user_level): ''' Confirms the relationship between pid and paper, as from user input. @param pid: id of the person @type pid: ('2',) @param papers: list of papers to confirm @type papers: (('100:7531,9024',),) ''' #expects a pid ('2',) #and a lst of papers (('100:7531,9024',),) class names_gatherer(Thread): def __init__ (self, pid): Thread.__init__(self) self.pid = pid def run(self): update_personID_names_string_set(self.pid) updated_pids = [] for p in papers: old_owners = run_sql("select personid from aidPERSONID where tag=%s and data=%s", ('paper', str(p[0]),)) if len(old_owners) > 0: for owner in old_owners: updated_pids.append((str(owner[0]),)) run_sql("delete from aidPERSONID where tag=%s and data=%s", ('paper', str(p[0]),)) run_sql("insert into aidPERSONID (PersonID, tag, data, flag, lcul) values (%s,'paper',%s,'2', %s)", (str(pid[0]), str(p[0]), user_level)) update_personID_names_string_set((pid,)) upd_thread = names_gatherer(tuple(updated_pids)) upd_thread.start() def reject_papers_from_person(pid, papers, user_level=0): ''' Confirms the negative relationship between pid and paper, as from user input. @param pid: id of the person @type pid: ('2',) @param papers: list of papers to confirm @type papers: (('100:7531,9024',),) ''' #expects a pid ('2',) #and a lst of papers (('100:7531,9024',),) #check if already assigned by user and skip those ones for p in papers: run_sql("update aidPERSONID set flag=%s,lcul=%s where PersonID=%s and data=%s", ('-2', user_level, str(pid[0]), str(p[0]))) update_personID_names_string_set((pid,)) def reset_papers_flag(pid, papers): ''' Resets the flag associated to the papers to '0' @param papers: list of papers to confirm @type papers: (('100:7531,9024',),) ''' for p in papers: run_sql("update aidPERSONID set flag=%s,lcul='0' where tag=%s and data=%s", ('0', 'paper', str(p[0]))) update_personID_names_string_set((pid,)) def get_papers_status(papers): ''' Gets the personID and flag assiciated to papers @param papers: list of papers @type papers: (('100:7531,9024',),) @return: (('data','personID','flag',),) @rtype: tuple of tuples ''' #lst of papers (('100:7531,9024',),) #for each paper gives: personid, assignment status papersstr = '( ' for p in papers: papersstr += '\'' + str(p[0]) + '\',' papersstr = papersstr[0:len(papersstr) - 1] + ' )' if len(papers) >= 1: return run_sql("select data,PersonID,flag from aidPERSONID where tag=%s and data in " + papersstr, ('paper',)) else: return [] def get_person_papers(pid, flag, show_author_name=False, show_title=False): ''' Returns all the paper associated to a person with a flag greater or equal than the given one. Eventually returns even author name and title associated to the papers. @param pid: person id @type pid: ('2',) @param flag: numerical flag, the convention is documented with the database table creation script @type papers: integer @param show_author_name: Also return authorname in dict? @type show_author_name: Boolean @param show_title: Also return title in dict? @type show_title: Boolean @return: [{'data': "", 'flag': "", 'author_name': "", 'title': ""}] author_name and title will be returned depending on the params @rtype: list of dicts ''' #expects a pid ('2',) #and a flag 0 paperslist = [] try: flag = int(flag) except ValueError: return paperslist docs = run_sql("SELECT data,flag FROM aidPERSONID where personid = %s" " and tag = %s and flag >= %s", (pid[0], 'paper', flag)) for doc in docs: listdict = {} if show_title: title = "No title on paper..." try: rec_id = int(doc[0].split(',')[1]) title = get_record(rec_id)['245'][0][0][0][1] except (IndexError, KeyError, ValueError): title = "Problem encountered while retrieving document title" listdict["title"] = title dsplit = doc[0].split(',') tnum = "70" if str(dsplit[0].split(':')[0]) == "100": tnum = "10" sqlstr = ("SELECT value FROM bib%sx WHERE id = " % (tnum)) + '%s' authorname = run_sql(sqlstr, (dsplit[0].split(':')[1],)) try: authorname = authorname[0][0] if show_author_name: listdict["authorname"] = authorname listdict["data"] = doc[0] listdict["flag"] = doc[1] paperslist.append(listdict) except IndexError: #The paper has been modified and this bibref is no longer there #@TODO: this must call bibsched to update_personid_table_from_paper continue return paperslist def add_person_paper_needs_manual_review(pid, bibrec): ''' Adds to a person a paper which needs manual review before bibref assignment @param pid: personid, int @param bibrec: the bibrec, int ''' set_person_data(pid, 'paper_needs_bibref_manual_confirm', bibrec) def get_person_papers_to_be_manually_reviewed(pid): ''' Returns the set of papers awaiting for manual review for a person for bibref assignment @param pid: the personid, int ''' return get_person_data(pid, 'paper_needs_bibref_manual_confirm') def del_person_papers_needs_manual_review(pid, bibrec): ''' Deletes from the set of papers awaiting for manual review for a person @param pid: personid, int @param bibrec: the bibrec, int ''' del_person_data(pid, 'paper_needs_bibref_manual_confirm', bibrec) def get_person_data(person_id, tag=None): ''' Returns all the records associated to a person. If tag != None only rows for the selected tag will be returned. @param person_id: id of the person to read the attribute from @type person_id: int @param tag: the tag to read. Optional. Default: None @type tag: string @return: the data associated with a virtual author @rtype: tuple of tuples ''' rows = [] if tag: rows = run_sql("SELECT tag, data FROM aidPERSONID " "WHERE personid = %s AND tag = %s", (person_id, tag)) else: rows = run_sql("SELECT tag, data FROM aidPERSONID " "WHERE personid = %s", (person_id,)) return rows def set_person_data(person_id, tag, value, user_level=0): ''' Change the value associated to the given tag for a certain person. @param person_id: ID of the person @type person_id: int @param tag: tag to be updated @type tag: string @param value: value to be written for the tag @type value: string ''' current_tag_value = run_sql("SELECT data FROM aidPERSONID " "WHERE personid = %s AND tag = %s AND " "data = %s", (person_id, tag, value)) if len(current_tag_value) > 0: run_sql("UPDATE aidPERSONID SET tag = %s, data = %s WHERE " "personid = %s AND tag = %s AND lcul = %s", (tag, value, person_id, tag, user_level)) else: run_sql("INSERT INTO aidPERSONID (`personid`, `tag`, `data`, `flag`, `lcul`) " "VALUES (%s, %s, %s, %s, %s);", (person_id, tag, value, '0', user_level)) def del_person_data(person_id, tag, value=None): ''' Change the value associated to the given tag for a certain person. @param person_id: ID of the person @type person_id: int @param tag: tag to be updated @type tag: string @param value: value to be written for the tag @type value: string ''' if not value: run_sql("delete from aidPERSONID where personid=%s and tag=%s", (person_id, tag)) else: run_sql("delete from aidPERSONID where personid=%s and tag=%s and data=%s", (person_id, tag, value)) def get_person_names_count(pid): ''' Returns the set of name strings and count associated to a person id @param pid: ID of the person @type pid: ('2',) @param value: value to be written for the tag @type value: string ''' return run_sql("select data,flag from aidPERSONID where PersonID=%s and tag=%s", (str(pid[0]), 'gathered_name',)) def get_person_db_names_count(pid): ''' Returns the set of name strings and count associated to a person id. The name strings are as found in the database. @param pid: ID of the person @type pid: ('2',) @param value: value to be written for the tag @type value: string ''' norm_names_count = run_sql("select data,flag from aidPERSONID where " "PersonID=%s and tag='gathered_name'", (str(pid[0]),)) norm_names_count_dict = {} db_names_count_dict = {} db_names = get_person_names_set(pid) return_list = [] for name, count in norm_names_count: norm_names_count_dict[name] = count names_to_join = [] for name in norm_names_count_dict: names_to_join.append([[name], []]) for db_name in db_names: try: ndb_name = create_normalized_name(split_name_parts(db_name[0])) db_names_count_dict[db_name[0]] = norm_names_count_dict[ndb_name] for i in names_to_join: if ndb_name in i[0]: i[1].append(db_name[0]) except (KeyError): db_names_count_dict[db_name[0]] = 1 for nl in names_to_join: name_string = '' for n in nl[1]: name_string += '"' + str(n) + '" ' if len(nl[1]) < 1: name_string = '"' + str(nl[0][0]) + '" ' return_list.append((name_string, norm_names_count_dict[nl[0][0]])) # for name, count in db_names_count_dict.iteritems(): # return_list.append((name, count)) # return_list = sorted(return_list, key=lambda k: k[0], reverse=False) return tuple(return_list) def get_person_names_set(pid): ''' Returns the set of name strings associated to a person id @param pid: ID of the person @type pid: ('2',) @param value: value to be written for the tag @type value: string ''' #expects a pid ('2',) docs = run_sql("SELECT `data` FROM `aidPERSONID` where PersonID=%s and tag=%s and flag>=%s", (str(pid[0]), 'paper', '-1',)) authornames = set() for doc in docs: dsplit = doc[0].split(',') tnum = "70" if str(dsplit[0].split(':')[0]) == "100": tnum = "10" sqlstr = "SELECT value FROM bib%sx WHERE id = " % tnum + "%s" authorname = run_sql(sqlstr, (dsplit[0].split(':')[1],)) authornames.add(authorname[0]) return list(authornames) def find_personIDs_by_name_string(namestring): ''' Search engine to find persons matching the given string @param: string name, 'surname, names I.' @type: string @return: pid list of lists [pid,[[name string, occur count, compatibility]]] The matching is done on the surname first, and names if present. An ordered list (per compatibility) of pids and found names is returned. ''' namestring_parts = split_name_parts(namestring) # The following lines create the regexp used in the query. surname = clean_name_string(namestring_parts[0], # replacement=".{0,3}", replacement="%", keep_whitespace=False, trim_whitespaces=True) # if not surname.startswith(".{0,3}"): # surname = "^['`-]*%s*" % (surname) surname = surname + ',%' print surname #The regexp is not used anymore because it's not finding all the strings it should have ; #the 'like' statement is slower, the regexp will be fixed asap # matching_pids_names_tuple = run_sql('select personid, data, flag ' # 'from aidPERSONID as a where ' # 'tag=\'gathered_name\' and ' # 'data REGEXP "%s"' # % (surname)) #@fixme: find_personIDs_by_name_string: the search can be done on authornames table and match the bibrefs, probably faster. matching_pids_names_tuple = run_sql("select personid, data, flag from aidPERSONID as a where " "tag=\'gathered_name\' and data like %s", (surname,)) if len(matching_pids_names_tuple) == 0 and len(surname) >= 6: surname = '%' + surname[0:len(surname) - 2] + '%' matching_pids_names_tuple = run_sql("select personid, data, flag from aidPERSONID as a where " "tag=\'gathered_name\' and data like %s", (surname,)) matching_pids = [] for name in matching_pids_names_tuple: comparison = compare_names(namestring, name[1]) matching_pids.append([name[0], name[1], name[2], comparison]) # matching_pids = sorted(matching_pids, key=lambda k: k[3], reverse=True) persons = {} for n in matching_pids: - if n[3] >= 0.0: + if n[3] >= 0.8: if n[0] not in persons: persons[n[0]] = sorted([[p[1], p[2], p[3]] for p in matching_pids if p[0] == n[0]], key=lambda k: k[2], reverse=True) porderedlist = [] for i in persons.iteritems(): porderedlist.append([i[0], i[1]]) porderedlist = sorted(porderedlist, key=lambda k: k[1][0][0], reverse=False) porderedlist = sorted(porderedlist, key=lambda k: k[1][0][2], reverse=True) return porderedlist def update_personID_names_string_set(PIDlist=[]): ''' Updates the personID table with the names gathered from documents @param: list of pids to consider, if omitted performs an update on the entire db @type: tuple of tuples Gets all the names associated to the bibref/bibrec couples of the person and builds a set of names, counting the occurrencies. The values are store in the gathered_name/flag fields of each person. The gathering of names is an expensive operation for the database (many joins), so the operation is threaded so to have as many parallell queries as possible. ''' if len(PIDlist) == 0: PIDlist = run_sql('SELECT DISTINCT `personid` FROM `aidPERSONID`')# LIMIT 1 , 15') class names_gatherer(Thread): def __init__ (self, pid): Thread.__init__(self) self.pid = pid self.pstr = '' def run(self): self.namesdict = dict() self.person_papers = run_sql("select data from `aidPERSONID` where tag=\'paper\' and " " flag >= \'-1\' and PersonID=%s", (str(self.pid[0]),)) for p in self.person_papers: self.pname = run_sql("select Name from aidAUTHORNAMES where id = " "(select Name_id from aidAUTHORNAMESBIBREFS where bibref = %s)", (str(p[0].split(',')[0]),)) if self.pname[0][0] not in self.namesdict: self.namesdict[self.pname[0][0]] = 1 else: self.namesdict[self.pname[0][0]] += 1 self.current_namesdict = dict(run_sql("select data,flag from aidPERSONID where personID=%s " "and tag=\'gathered_name\'", (str(self.pid[0]),))) self.needs_update = False if self.current_namesdict != self.namesdict: self.needs_update = True else: for i in self.namesdict.iteritems(): if i[1] != self.current_namesdict[i[0]]: self.needs_update = True if bconfig.TABLES_UTILS_DEBUG: pass # sys.stdout.write(str(self.pid) + str(i[1]) + ' differs from ' + str(self.current_namesdict[i[0]])) # sys.stdout.flush() if self.needs_update: if bconfig.TABLES_UTILS_DEBUG: pass # sys.stdout.write(str(self.pid) + ' updating!') # sys.stdout.flush() run_sql("delete from `aidPERSONID` where PersonID=%s and tag=%s", (str(self.pid[0]), 'gathered_name')) for name in self.namesdict: # self.pstr += ' ' + str(self.pid[0]) + ' ...processing: ' + str(name) + ' ' + str(self.namesdict[name]) run_sql('insert into aidPERSONID (PersonID, tag, data, flag) values (' + str(self.pid[0]) + ',\'gathered_name\',\"' + str(name) + '\",\"' + str(self.namesdict[name]) + '\")') # else: # sys.stdout.write(str(self.pid) + ' not updating!') # sys.stdout.flush() # sys.stdout.write(self.pstr + '\n') # sys.stdout.flush() tgath = [] for pid in PIDlist: current = names_gatherer(pid) tgath.append(current) current.start() if bconfig.TABLES_UTILS_DEBUG: sys.stdout.write(str(pid) + '.\n') sys.stdout.flush() while threading.activeCount() > bconfig.PERSONID_SQL_MAX_THREADS: time.sleep(0.02) for t in tgath: t.join() def update_personID_from_algorithm(RAlist=[]): ''' Updates the personID table with the results of the algorithm, taking into account user inputs @param: list of realauthors to consider, if omitted performs an update on the entire db @type: tuple of tuples This is the core of the matching between the bibauthorid world and the personid world. For each RA of the list, tries to find the person it should be (in an ideal world there is 100% matching in the list of papers, and the association is trivial). In the real world an RA might be wrongly carrying papers of more then one person (or a person might have papers of more then one RAs) so the matching must be done on a best-effort basis: -find the most compatible person -if it's compatible enough, merge the person papers with the ra papers (after a backtracking to find all the other RAs which the person might 'contain') -if nobody is compatible enough create a new person with RA papers Given the fuzzy nature of both the computation of RAs and the matching with persons, it has been decided to stick to the person all and only the papers which are carried by the RAs over a certain threshold. ''' def get_bibreclist(currentRA): #[['700:157610,453095', '1.0']] VAlist = run_sql("SELECT `virtualauthorID`,`p` FROM `aidREALAUTHORS` WHERE `realauthorID`=%s", (str(currentRA[0]),)) bibreclist = [] bibrecstruct = [] class get_va_bibreclist(Thread): def __init__ (self, va): Thread.__init__(self) self.va = va self.bibreclist = [] def run(self): if bconfig.TABLES_UTILS_DEBUG: pass #print ' --debug: thread spawn for bibreclist of va: ' + str(self.va) bibrec = dict(run_sql("SELECT `tag`,`value` FROM `aidVIRTUALAUTHORSDATA` WHERE " "virtualauthorID=%s and (tag=%s or tag=%s)", (str(self.va[0]), 'bibrec_id', 'orig_authorname_id'))) if (not bibrec.has_key("orig_authorname_id")) or (not bibrec.has_key("bibrec_id")): if bconfig.TABLES_UTILS_DEBUG: print ("WARNING: VA %s holds no data." % self.va[0]) return bibreflist = run_sql("SELECT `bibrefs` FROM `aidAUTHORNAMES` WHERE `id`=%s", (str(bibrec['orig_authorname_id']),)) bibreflist = bibreflist[0][0].split(',') bibref100string = '(' bibref700string = '(' for br in bibreflist: if br.split(':')[0] == '100': bibref100string += '\'' + br.split(':')[1] + '\',' else: bibref700string += '\'' + br.split(':')[1] + '\',' if bibref100string[len(bibref100string) - 1] == ',': bibref100string = bibref100string[0:len(bibref100string) - 1] + ')' else: bibref100string = '' if bibref700string[len(bibref700string) - 1] == ',': bibref700string = bibref700string[0:len(bibref700string) - 1] + ')' else: bibref700string = '' if bibref100string: bibrec100list = run_sql("SELECT `id_bibxxx` FROM `bibrec_bib10x` WHERE `id_bibrec`=%s" " and `id_bibxxx` in " + bibref100string, (str(bibrec['bibrec_id']),)) else: bibrec100list = [] if bibref700string: bibrec700list = run_sql("SELECT `id_bibxxx` FROM `bibrec_bib70x` WHERE `id_bibrec`=%s" " and `id_bibxxx` in" + bibref700string, (str(bibrec['bibrec_id']),)) else: bibrec700list = [] for br in bibreflist: if (long(br.split(':')[1]),) in bibrec100list: if br not in self.bibreclist: self.bibreclist.append([br + ',' + bibrec['bibrec_id'] , str(self.va[1])]) break elif (long(br.split(':')[1]),) in bibrec700list: if br not in self.bibreclist: self.bibreclist.append([br + ',' + bibrec['bibrec_id'] , str(self.va[1])]) break tbibreclist = [] if bconfig.TABLES_UTILS_DEBUG: print 'update_personID_from_algorithm: get_bibreclist threads: ' for va in VAlist: current = get_va_bibreclist(va) tbibreclist.append(current) if bconfig.TABLES_UTILS_DEBUG: sys.stdout.write('.') sys.stdout.flush() current.start() while threading.activeCount() > bconfig.PERSONID_SQL_MAX_THREADS: time.sleep(0.02) for t in tbibreclist: t.join() for b in t.bibreclist: if b not in bibreclist: bibreclist.append(b) if bconfig.TABLES_UTILS_DEBUG: print '\nupdate_personID_from_algorithm: get_bibreclist ---------------- Considering RA: ' + str(currentRA) return bibreclist def create_new_person(bibreclist): #creating new personid PID = max(run_sql('SELECT MAX(PersonID) FROM `aidPERSONID`')[0][0], -1) + 1 SQLquery = '' for br in bibreclist: flag = 0 if br[1] >= bconfig.PERSONID_CNP_FLAG_1: flag = 1 elif br[1] < bconfig.PERSONID_CNP_FLAG_MINUS1: flag = -1 SQLquery += ('insert into `aidPERSONID` (PersonID, tag, data, flag) values (' + str(PID) + ', \'paper\',%s,\'' + str(flag) + '\');') % ('\'' + br[0] + '\'') if SQLquery: run_sql(SQLquery) update_personID_names_string_set(((str(PID),),)) if bconfig.TABLES_UTILS_DEBUG: print 'update_personID_from_algorithm: create_new_person ---------------- ' + str(PID) def get_person_ra(person_papers): inverse_ra_list = [] papers_vas = [] class get_va_from_paper(Thread): def __init__ (self, paper): Thread.__init__(self) self.paper = paper self.vas = [] def run(self): self.authnameid = run_sql("select Name_id from aidAUTHORNAMESBIBREFS where bibref=%s", (str(self.paper[0].split(',')[0]),)) self.va = run_sql( "select a.virtualauthorID from aidVIRTUALAUTHORSDATA as a inner join " "aidVIRTUALAUTHORSDATA as b on a.virtualauthorID=b.virtualauthorID " "where ((a.tag=%s and a.value=%s) and (b.tag=%s and b.value=%s))", ('bibrec_id', str(self.paper[0].split(',')[1]), 'orig_authorname_id', str(self.authnameid[0][0]))) #This is left here for benchmarking, it is still not clear which approach is the fastest #self.va = run_sql('select virtualauthorID from `aidVIRTUALAUTHORSDATA` where ( virtualauthorID in (' # + ('select virtualauthorID from `aidVIRTUALAUTHORSDATA` where tag=\'bibrec_id\' and value=\'%s\'') # % (str(self.paper[0].split(',')[1])) # + ')) and ((tag, value) = (\'orig_authorname_id\', \'' # + str(authnameid[0][0]) + '\'))') for i in self.va: self.vas.append(i[0]) tvapaper = [] if bconfig.TABLES_UTILS_DEBUG: print 'update_personID_from_algorithm: get_va_from_paper threads: ' for paper in person_papers: current = get_va_from_paper(paper) tvapaper.append(current) if bconfig.TABLES_UTILS_DEBUG: sys.stdout.write('.') sys.stdout.flush() current.start() while threading.activeCount() > bconfig.PERSONID_SQL_MAX_THREADS: time.sleep(0.02) for t in tvapaper: t.join() for b in t.vas: if b not in papers_vas: papers_vas.append(b) papers_vas_string = '( ' for i in papers_vas: papers_vas_string += '\'' + str(i) + '\',' papers_vas_string = papers_vas_string[0:len(papers_vas_string) - 1] + ' )' if len(papers_vas) >= 1: inverse_ra_list.append(run_sql("select distinct `realauthorID` " " from `aidREALAUTHORS` where virtualauthorID in " + papers_vas_string)) else: inverse_ra_list = [] if bconfig.TABLES_UTILS_DEBUG: print '\nupdate_personID_from_algorithm: get_person_ra ---------------- on ' + str(person_papers) return inverse_ra_list def merge_update_person_with_ra(pids, person_paper_list, currentRA, bibreclist): ras = get_person_ra(person_paper_list) # bibrecslists = [] bibrecset = set() person_rejected_papers = run_sql("select data from `aidPERSONID` where " ' tag=%s and flag=%s and PersonID=%s', ('paper', '-2', str(pids[0]))) person_confirmed_papers = run_sql("select data from `aidPERSONID` where " ' tag=%s and flag=%s and PersonID=%s', ('paper', '2', str(pids[0]))) person_rejected_papers_set = set() for paper in person_rejected_papers: person_rejected_papers_set.add(paper[0]) person_confirmed_papers_set = set() for paper in person_confirmed_papers: person_confirmed_papers_set.add(paper[0]) for ra in ras: list = get_bibreclist(ra[0]) # bibrecslists.append(list) for doc in list: if doc[1] >= bconfig.PERSONID_MIN_P_FROM_BCTKD_RA: bibrecset.add(doc[0]) for doc in bibreclist: if doc[1] >= bconfig.PERSONID_MIN_P_FROM_NEW_RA: bibrecset.add(doc[0]) person_paper_set = set() for paper in person_paper_list: person_paper_set.add(paper[0]) p_to_add = bibrecset.difference(person_paper_set) p_to_add = p_to_add.difference(person_rejected_papers_set) p_to_add = p_to_add.difference(person_confirmed_papers_set) # we might consider the case in which the algorithm is clustering two papers which are # manually assigned to different persons. That would mean adding a potentially really slow query # and once tthe algorithm will be able to take into consideration user input logs that should never happen # so this will be left to be done once we will see if it is really necessary to slow down everything # when the algorithm is clustering nicely this shouldn't happen anyway p_to_remove = person_paper_set.difference(bibrecset) p_to_remove = p_to_remove.difference(person_confirmed_papers_set) p_to_remove = p_to_remove.difference(person_rejected_papers_set) SQLquery = '' for br in p_to_add: SQLquery += ('insert into `aidPERSONID` (PersonID, tag, data, flag) values (' + str(pids[0]) + ', \'paper\',%s,\'0\');') % ('\'' + br + '\'') if SQLquery: run_sql(SQLquery) SQLquery = '' for br in p_to_remove: SQLquery += ('delete from `aidPERSONID` where PersonID=\'' + str(pids[0]) + '\' and tag=\'paper\' and data=\'' + str(br) + '\';') if SQLquery: run_sql(SQLquery) update_personID_names_string_set((pids,)) if bconfig.TABLES_UTILS_DEBUG: print 'update_personID_from_algorithm: Merging ----------------' + str(pids) + ' with realauthor ' + str(currentRA) + ' and found ras ' + str(ras) # print 'adding ' + str(p_to_add) # print 'removing ' + str(p_to_remove) if len(RAlist) == 0: RAlist = run_sql('SELECT DISTINCT `realauthorID` FROM `aidREALAUTHORS`')# LIMIT 1 , 15') for currentRA in RAlist: print '---|||||--- considering RA ' + str(currentRA) #bibreclist is the list of bibrefs associated with a RA bibreclist = get_bibreclist(currentRA) if not bibreclist: if bconfig.TABLES_UTILS_DEBUG: print "update_personID_from_algorithm: Skipping RA. Got no data from VA." continue bibrecsqlstring = '( ' for i in bibreclist: bibrecsqlstring += '\'' + str(i[0]) + '\',' bibrecsqlstring = bibrecsqlstring[0:(len(bibrecsqlstring) - 1)] + ' )' sqlstr = "SELECT DISTINCT PersonID FROM `aidPERSONID` WHERE tag=%s and `flag` >= %s and `data` in " + bibrecsqlstring if len(bibreclist) >= 1: pids = run_sql(sqlstr, ('paper', '0')) else: pids = [] if bconfig.TABLES_UTILS_DEBUG: print 'update_personID_from_algorithm: Possible PIDS: ' + str(pids) if len(pids) < 1: create_new_person(bibreclist) else: #collect all the bibrefs #find all RA involved #decide which ones are really connected (>threshold) #merge them in the person found person_paper_list = [] for pid in pids: person_papers = run_sql("select data from `aidPERSONID` where tag=%s and " "flag >= %s and PersonID=%s", ('paper', str(bconfig.PERSONID_UPFA_PPLMF), str(pid[0]))) person_paper_list.append(person_papers) docn = len(bibreclist) bibrectdict = dict(bibreclist) compatibility_list = [] compatible_papers_count = [] for pid in person_paper_list: sum = 0.0 p_c = 0.0 for doc in pid: try: sum += float(bibrectdict[doc[0]]) p_c += 1 except: pass #print 'noindex exception!' compatibility_list.append(sum / docn) compatible_papers_count.append(p_c / docn) if bconfig.TABLES_UTILS_DEBUG: print 'update_personID_from_algorithm: Compatibility list: ' + str(compatibility_list) if max(compatibility_list) < bconfig.PERSONID_MAX_COMP_LIST_MIN_TRSH: if bconfig.TABLES_UTILS_DEBUG: print 'update_personID_from_algorithm: Max compatibility list < than 0.5!!!' pidindex = compatible_papers_count.index(max(compatible_papers_count)) if compatible_papers_count[pidindex] >= bconfig.PERSONID_MAX_COMP_LIST_MIN_TRSH_P_N: merge_update_person_with_ra(pids[pidindex], person_paper_list[pidindex], currentRA, bibreclist) else: create_new_person(bibreclist) else: maxcount = compatibility_list.count(max(compatibility_list)) if maxcount == 1: #merge pidindex = compatibility_list.index(max(compatibility_list)) merge_update_person_with_ra(pids[pidindex], person_paper_list[pidindex], currentRA, bibreclist) elif maxcount > 1: if bconfig.TABLES_UTILS_DEBUG: print 'update_personID_from_algorithm: !!!!!!!!!!!!! Passing by, no maximum in compatibility list??' #resolve merge else: if bconfig.TABLES_UTILS_DEBUG: print 'update_personID_from_algorithm: !!!!!!!!!!!!! Error: no one is compatible!!? not doing anything...' def export_personid_to_spiresid_validation(filename='/tmp/inspirepid', filename_oids='/tmp/inspirepidoids'): ''' WARNING: still to be consolidated, but output is usable WARNING^2: S L O W . @fixme: export_personid_to_spiresid_validation: use get_record, might be much faster ''' fp = open(filename, 'w') fp2 = open(filename_oids, 'w') fp.write('Personid->inspireid match:\n\n') fp2.write('Personid->inspireid match: INSPERE IDS only \n\n') pids = run_sql('SELECT personid FROM `aidPERSONID` WHERE 1 group by personid') for pid in pids: print 'considering:' + str(pid) fp.write('Considering pid' + str(pid) + '\n') fp2.write('Considering pid' + str(pid) + '\n') papers = run_sql('select data from aidPERSONID where tag=\'paper\' and ' 'personid=\'' + str(pid[0]) + '\' ') parray = [] for paper in papers: if paper[0].split(':')[0] == '700': print ' -' + str(paper) fields = run_sql('select id,value from bib70x where ' '(tag=\'700__a\') and ' 'id=\'' + str(paper[0].split(',')[0].split(':')[1]) + '\'') insid = run_sql('select id,value from bib70x where tag=\'700__i\' ' 'and (id) in ' '(select a.id_bibxxx from bibrec_bib70x as a inner join ' 'bibrec_bib70x as b using(id_bibrec)' 'where a.field_number = b.field_number and ' 'b.id_bibxxx = \'' + str(paper[0].split(',')[0].split(':')[1]) + '\' and b.id_bibrec = \'' + str(paper[0].split(',')[1]) + '\')') parray.append([fields, insid, paper]) for p in parray: fp.write(' - ' + str(p[0]) + ' ' + str(p[1]) + ' from ' + str(p[2]) + '\n') if len(p[1]) >= 1: fp2.write(' - ' + str(p[0]) + ' ' + str(p[1]) + ' from ' + str(p[2]) + '\n') fp.close() fp2.close() def export_spiresid_to_personid_validation(filename='/tmp/inspireid'): '''WARNING: still to be consolidated''' fp = open(filename, 'w') bibrefs = run_sql('SELECT id,tag,value,count(value) FROM `bib70x` WHERE ' '`tag` LIKE \'700__i\' group by value order by value') realbibrefs = [] fp.write('Inspireid->personid match:\n\n') for i in bibrefs: print 'considering:' + str(i) # bibref = run_sql('select id,value from bib70x where tag=\'700__a\' ' # 'and (id) in (select id_bibxxx from bibrec_bib70x where ' # '(id_bibrec,field_number) in ' # '(select id_bibrec,field_number from bibrec_bib70x ' # 'where id_bibxxx = \''+str(i[0])+'\'))') bibref = run_sql('select id,value from bib70x where tag=\'700__a\' ' 'and (id) in ' '(select a.id_bibxxx from bibrec_bib70x as a inner join ' 'bibrec_bib70x as b using(id_bibrec)' 'where a.field_number = b.field_number and ' 'b.id_bibxxx = \'' + str(i[0]) + '\')') print ' found ' + str(bibref) for bib in bibref: fp.write(' -\n') pids = run_sql('select personid from aidPERSONID where tag=\'paper\'' ' and data like \'700:%,' + str(bib[0]) + '\'') fp.write(str(i) + ':\n') for pid in pids: names = run_sql('select data,flag from aidPERSONID where' ' tag=\'gathered_name\'' ' and personID=\'' + str(pid[0]) + '\'') fp.write(' -' + str(pid) + ': ' + str(names) + '\n ') fp.write('\n') fp.close() ''' User log case usages and contents reference. Table structure: id trans_id timestamp userinfo personID action tag value comment int int time char255 int char50 char50 char200 text Operations on papers: * Assignment: - assign bibrec,bibref to personid id trans_id timestamp userinfo personID action tag value comment xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx assign paper x00:xxxx,xxxx NULL xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx assign paper x00:xxxx,xxxx 'telephone request of the author bla bla bla' xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx assign paper x00:xxxx,xxxx 'first manual assignment, moved from pid: xxxx' * Rejection: - reject bibrec,bibref from personid id trans_id timestamp userinfo personID action tag value comment xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx reject paper x00:xxxx,xxxx NULL xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx reject paper x00:xxxx,xxxx 'telephone request of the author bla bla bla' xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx reject paper x00:xxxx,xxxx 'manual inspection of the paper' * Reset: - Reset bibrec,bibref status (don't know who really is the author) id trans_id timestamp userinfo personID action tag value comment xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx reset paper x00:xxxx,xxxx NULL xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx reset paper x00:xxxx,xxxx 'discovered error' xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx reset paper x00:xxxx,xxxx 'not enough information on the paper' Action,tag allowed couples: * assign,paper * reject,paper * reset,paper Operations on person ids: * Add: - assign info to personid id trans_id timestamp userinfo personID action tag value comment xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx data_add inspire_uid uid_inspire_xxx NULL xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx data_add email_addr xxx@xxx.xxx NULL xxx xxxxx xxxx-xx-xx uid_inspire_xxx xxxx data_mod email_addr zzz@xxx.xxx NULL Action,tag allowed couples: * data_add,inspire_uid * data_add,email_addr * data_add,full_name * data_add,address * data_add,telephone_[home|office|...] ** data_mod, data_del: same as data_add NOTE: new action/tag can be addedd as needed NOTE: in case of need comment can be used instead of value (which is limited to 255 chars), but it is important to be consistent: if a field is using comment instead of value that _must_ be done _always_. Automated operations: * Table updates: - Update_authornames_table_from_paper id trans_id timestamp userinfo personID action tag value comment xxx xxxxx xxxx-xx-xx daemon -1 UATFP bibsched status NULL Actions: * update_auntornames_table_from_paper: UATFP * authornames_tables_gc: ATGC * update_personid_table_from_paper: UPITFP ''' def get_user_log(transactionid='', userinfo='', personID='', action='', tag='', value='', comment='', only_most_recent=False): ''' Get user log table entry matching all the given parameters; all of them are optional. IF no parameters are given retuns the complete log table @param transactionid: id of the transaction @param userinfo: user name or identifier @param personid: id of the person involved @param action: action @param tag: tag @param value: value @param comment: comment ''' sql_query = 'select * from aidUSERINPUTLOG where 1 ' if transactionid: sql_query += ' and transactionid=\'' + str(transactionid) + '\'' if userinfo: sql_query += ' and userinfo=\'' + str(userinfo) + '\'' if personID: sql_query += ' and personid=\'' + str(personID) + '\'' if action: sql_query += ' and action=\'' + str(action) + '\'' if tag: sql_query += ' and tag=\'' + str(tag) + '\'' if value: sql_query += ' and value=\'' + str(value) + '\'' if comment: sql_query += ' and comment=\'' + str(comment) + '\'' if only_most_recent: sql_query += ' order by timestamp desc limit 0,1' return run_sql(sql_query) def insert_user_log(userinfo, personid, action, tag, value, comment='', transactionid=0, timestamp=''): ''' Instert log entries in the user log table. For example of entres look at the table generation script. @param userinfo: username or user identifier @type: string @param personid: personid involved in the transaction @type: longint @param action: action type @type: string @param tag: tag @type: string @param value: value for the transaction @type: string @param comment: optional comment for the transaction @type: string @param transactionid: optional id for the transaction @type: longint @return: the transactionid @type: longint ''' if transactionid == 0: transactionid = max(run_sql('SELECT MAX(transactionid) FROM `aidUSERINPUTLOG`')[0][0], -1) + 1 if timestamp: tsui = '\',\'' + str(timestamp) + '\',\'' + str(userinfo) else: tsui = '\',now(),\'' + str(userinfo) run_sql('insert into aidUSERINPUTLOG (transactionid,timestamp,userinfo,personid,' 'action,tag,value,comment) values' '(\'' + str(transactionid) + tsui + '\',\'' + str(personid) + '\',\'' + str(action) + '\',\'' + str(tag) + '\',\'' + str(value) + '\',\'' + str(comment) + '\')') return transactionid def create_persistent_tables(): ''' Creates the personID tables. Separated process from create_database_tables() becouse those would like to be persistent tables while the others are likely to change as the algorithm gets improved. This script is kept here as a development utility, to allow easy db tables creation without need of a reinstall of invenio. ''' # source = open("/opt/cds-invenio/etc/bibauthorid/personid_table_structures.sql", 'r') # query = source.read() # qcount = query.count("CREATE") # if qcount > 0: # run_sql(query) # run_sql("COMMIT;") # bconfig.LOGGER.log(25, "Done. Created %s tables. Please check if there " # "now exist %s tables with the prefix 'aid_'." # % (qcount, qcount)) run_sql(''' -- --WARNING: this creation scripts are carboncopied in miscutil/sql/tabcreate.sql -- to have all the tables created at installation time. -- Remember to propagate any changes! -- SET SQL_MODE="NO_AUTO_VALUE_ON_ZERO"; -- -- Table structure for table `aid_personid` -- -- 1 1 paper 100:1234 2 -- 2 5 hepname 123456 1 -- name|affiliation|whatever -- flag values: -- +2: user approved the paper-person assignment -> don't touch! -- +1: Automatically attached by authorid algorithm with a probability of >= .75 -- 0: Automatically attached by authorid algorithm with a probability of >= .5 -- -1: Automatically attached by authorid algorithm with a probability of < .5 -- and serves as an indicator for showing the record on the UI. Prevents -- error-prone merges in the person id creator. -- -2: user disapproved the paper-person assignment -> try to find -- new person in the next iteration of the algo while disregarding -- the paper-person assigned defined by this row -- <-2 or >2: Internal algorithm flags. Free for future use. CREATE TABLE IF NOT EXISTS `aidPERSONID` ( `id` bigint(15) NOT NULL AUTO_INCREMENT, `personid` bigint(15) NOT NULL, `tag` varchar(50) NOT NULL, `data` varchar(250) NOT NULL, `flag` int NOT NULL DEFAULT '0', PRIMARY KEY (`id`), INDEX `personid-b` (`personid`), INDEX `tag-b` (`tag`), INDEX `data-b` (`data`), INDEX `flag-b` (`flag`) ) TYPE=MyISAM; -- -------------------------------------------------------- -- -- Table structure for table `aid_user_input_log` -- -- 1 1 2010-09-30 19:30 admin||10.0.0.1 1 assign paper 1133:4442 'from 23' -- a paper has been assigned to 1 (flag = 2); before it was of 23 -- 2 1 2010-09-30 19:30 admin||10.0.0.1 1 assign paper 8147:4442 -- 3 2 2010-09-30 19:35 admin||10.0.0.1 1 reject paper 72:4442 -- paper 72:4442 was surely not written by 1 (flag=-2) -- 4 3 2010-09-30 19:40 admin||10.0.0.1 2 assign paper 1133:4442 -- 5 4 2010-09-30 19:48 admin||10.0.0.1 12 reset paper 1133:4442 'from 12' -- somehow we no longer have info on 1133:4442 (flag=0) -- 6 5 2010-09-30 19:48 admin||10.0.0.1 5 data_add data name:cristoforocolombo 'not sure of the spelling' -- 7 6 2010-09-30 19:48 admin||10.0.0.1 5 data_rem data name:cristoforocolombo 'it was wrong' -- 8 7 2010-09-30 19:48 admin||10.0.0.1 6 data_alter data email:aoeu@aoeu.oue 'got new valid address' CREATE TABLE IF NOT EXISTS `aidUSERINPUTLOG` ( `id` bigint(15) NOT NULL AUTO_INCREMENT, `transactionid` bigint(15) NOT NULL, `timestamp` datetime NOT NULL, `userinfo` varchar(255) NOT NULL, `personid` bigint(15) NOT NULL, `action` varchar(50) NOT NULL, `tag` varchar(50) NOT NULL, `value` varchar(200) NOT NULL, `comment` text, PRIMARY KEY (`id`), INDEX `transactionid-b` (`transactionid`), INDEX `timestamp-b` (`timestamp`), INDEX `userinfo-b` (`userinfo`), INDEX `personid-b` (`personid`), INDEX `action-b` (`action`), INDEX `tag-b` (`tag`), INDEX `value-b` (`value`), ) TYPE=MyISAM; ''') def export_personID_to_human_readable_file(filename='/tmp/hrexport.txt', Pids=[]): ''' @deprecated: support for legacy software Export the personID of each document to a human readable file, for brief inspection purposes. @param file: filename to output to @type: string @param Pids: list of persons ids to limit the export @type: (('2',),) ''' if len(Pids) == 0: Pids = run_sql('SELECT DISTINCT `PersonID` FROM `aidPERSONID`')# LIMIT 1,20') destfile = open(filename, 'w') for pid in Pids: if bconfig.TABLES_UTILS_DEBUG: print 'Exporting ' + str(pid) + '...' infos = run_sql('SELECT tag,data FROM `aidPERSONID` where PersonID=\'' + str(pid[0]) + '\' and not tag=\'paper\'') docs = run_sql('SELECT `data` FROM `aidPERSONID` where PersonID=\'' + str(pid[0]) + '\' and tag=\'paper\' and flag>=\'-1\'') destfile.write('Person ID: ' + str(pid[0]) + '\n') for info in infos: destfile.write(' info [' + str(info[0]) + ']: ' + str(info[1]) + '\n') for doc in docs: #title = run_sql('SELECT `value` FROM `bib24x` WHERE `id` in \ # ((select id_bibxxx from bibrec_bib24x where id_bibrec=\'' + str(doc[0].split(',')[1]) + '\')) and tag=\'245__a\'') #id = run_sql('SELECT `id_bibxxx` FROM `bibrec_bib' + ('10' if str(doc[0].split(',')[0].split(':')[0]) == '100' else '70') # + 'x` WHERE and `id`=\'' + str(doc[0].split(',')[0].split(':')[1]) + '\'') title = "No title on paper..." try: title = get_record(int(doc[0].split(',')[1]))['245'][0][0][0][1] except (IndexError, KeyError, ValueError): title = "Problem encountered while retrieving document title" dsplit = doc[0].split(',') tnum = "70" if str(dsplit[0].split(':')[0]) == "100": tnum = "10" authorname = run_sql("SELECT value FROM bib%sx " "WHERE id = %s" % (tnum, dsplit[0].split(':')[1])) destfile.write(' name: ' + str(authorname) + ' paper: [' + str(doc[0]) + ']: ' + str(title) + '\n') destfile.write('------------------------------------------------------------------------------\n') destfile.close() def export_personID_to_spires(filename='/tmp/spiresexport.txt', Pids=[]): ''' @deprecated: support for legacy software Export the personID of each document to SPIRES syntax. @param file: filename to output to @type: string @param Pids: list of persons ids to limit the export @type: (('2',),) ''' if len(Pids) == 0: Pids = run_sql('SELECT DISTINCT `PersonID` FROM `aidPERSONID`')# LIMIT 0,20') destfile = open(filename, 'w') for pid in Pids: if bconfig.TABLES_UTILS_DEBUG: print 'Exporting ' + str(pid) + '...' docs = run_sql('SELECT `data` FROM `aidPERSONID` where PersonID=\'' + str(pid[0]) + '\' and tag=\'paper\' and flag>=\'-1\'') for doc in docs: f970a = docs = run_sql('SELECT `value` FROM `bib97x` where id=\'' + str(doc[0].split(',')[1]) + '\' and tag=\'970__a\'') dsplit = doc[0].split(',') tnum = "70" if str(dsplit[0].split(':')[0]) == "100": tnum = "10" author_number = run_sql("SELECT field_number FROM bibrec_bib%sx " "WHERE id_bibrec = %s " "AND id_bibxxx = %s" % (tnum, dsplit[1], dsplit[0].split(':')[1])) author_offset = run_sql("SELECT min(field_number) FROM bibrec_bib%sx " "WHERE id_bibrec = %s" % (tnum, dsplit[1])) # print f970a, author_number, doc # if len(author_number) >= 1: # destfile.write('merge = ' + str(f970a[0][0].split('-')[1]) + ';\nastr(' # + str(author_number[0][0]) + ');\nauthor-note(100)=INSPIRE-AUTO-' # + str(pid[0]) + ';\n;\n') if str(doc[0].split(',')[0].split(':')[0]) == '100': author_exp = 1 else: if len(author_number) >= 1: author_exp = author_number[0][0] - author_offset[0][0] + 2 else: if bconfig.TABLES_UTILS_DEBUG: print "No authornumber, setting -1!!!" author_exp = -1 if bconfig.TABLES_UTILS_DEBUG: print f970a, author_number, author_offset, author_exp, doc destfile.write('merge = ' + str(f970a[0][0].split('-')[1]) + ';\nastr(' + str(author_exp) + ');\nauthor-note(100)=INSPIRE-AUTO-' + str(pid[0]) + ';\n;\n') destfile.close() # IRN = <value of 970a>; # ASTR; # A= <author name from record>; # AFF = <affiliation from record 100/700u>; # DESY-AUTHOR = INSPIRE-BIBAUTHOR-<ID from bibauthor>; def user_can_modify_data(uid, pid): ''' Return True if the uid can modify data of this personID, false otherwise. @param uid: the user id @type: int @param pid: the person id @type: int ''' pid_uid = run_sql("select data from aidPERSONID where tag = %s and personid = %s", ('uid', str(pid))) if len(pid_uid) >= 1: if str(uid) == str(pid_uid[0][0]): if acc_authorize_action(uid, bconfig.CLAIMPAPER_CHANGE_OWN_DATA)[0] == 0: return True if acc_authorize_action(uid, bconfig.CLAIMPAPER_CHANGE_OTHERS_DATA)[0] == 0: return True return False else: if acc_authorize_action(uid, bconfig.CLAIMPAPER_CHANGE_OTHERS_DATA)[0] == 0: return True return False def user_can_modify_paper(uid, paper): ''' Return True if the uid can modify this paper, false otherwise. If the paper is assigned more then one time (from algorithms) consider the most privileged assignment. @param uid: the user id @type: int @param paper: the paper bibref,bibrec pair x00:1234,4321 @type: str ''' prow = run_sql("select * from aidPERSONID where tag=%s and data =%s" "order by lcul desc limit 0,1", ('paper', str(paper))) if len(prow) == 0: if ((acc_authorize_action(uid, bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS)[0] == 0) or (acc_authorize_action(uid, bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS)[0] == 0)): return True return False min_req_acc_n = int(prow[0][5]) req_acc = resolve_paper_access_right(bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS) pid_uid = run_sql("select data from aidPERSONID where tag = %s and personid = %s", ('uid', str(prow[0][1]))) if len(pid_uid) > 0: if (str(pid_uid[0][0]) != str(uid)) and min_req_acc_n > 0: req_acc = resolve_paper_access_right(bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS) if min_req_acc_n < req_acc: min_req_acc_n = req_acc min_req_acc = resolve_paper_access_right(min_req_acc_n) if (acc_authorize_action(uid, min_req_acc)[0] == 0) and (resolve_paper_access_right(min_req_acc) >= min_req_acc_n): return True else: return False def resolve_paper_access_right(acc): ''' Given a string or an integer, resolves to the corresponding integer or string If asked for a wrong/not present parameter falls back to the minimum privilege. ''' access_dict = {bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE: 0, bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS: 25, bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS: 50} if isinstance(acc, str): try: return access_dict[acc] except: return 0 inverse_dict = dict([[v, k] for k, v in access_dict.items()]) lower_accs = [a for a in inverse_dict.keys() if a <= acc] try: return inverse_dict[max(lower_accs)] except: return bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE def resolve_data_access_right(acc): ''' Given a string or an integer, resolves to the corresponding integer or string If asked for a wrong/not present parameter falls back to the minimum privilege. ''' access_dict = {bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE: 0, bconfig.CLAIMPAPER_CHANGE_OWN_DATA: 25, bconfig.CLAIMPAPER_CHANGE_OTHERS_DATA: 50} if isinstance(acc, str): try: return access_dict[acc] except: return 0 inverse_dict = dict([[v, k] for k, v in access_dict.items()]) lower_accs = [a for a in inverse_dict.keys() if a <= acc] try: return inverse_dict[max(lower_accs)] except: return bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE def person_bibref_is_touched(pid, bibref): ''' Determines if a record attached to a person has been touched by a human by checking the flag. @param pid: The Person ID of the person to check the assignment from @type pid: int @param bibref: The paper identifier to be checked (e.g. "100:12,144") @type bibref: string ''' if not isinstance(pid, int): try: pid = int(pid) except (ValueError, TypeError): raise ValueError("Person ID has to be a number!") if not bibref: raise ValueError("A bibref is expected!") flag = run_sql("SELECT flag FROM aidPERSONID WHERE " "personid = %s AND tag = 'paper' AND data = %s" , (pid, bibref)) try: flag = flag[0][0] except (IndexError): return False if not flag: return False elif - 2 < flag < 2: return False else: return True #action list: #view_pid_world #change_own_data #change_others_dataĹ #claim_own_papers #claim_others_papers def assign_uid_to_person(uid, pid, create_new_pid=False, force=False): ''' Assigns a userid to a person, counterchecknig with get_personid_from_uid. If uid has already other person returns other person. If create_new_pid and the pid is -1 creates a new person. If force, deletes any reference to that uid from the tables and assigns to pid, if pid wrong (less then zero) returns -1. @param uid: user id, int @param pid: person id, int @param create_new_pid: bool @param force, bool ''' def create_new_person(uid): #creates a new person pid = run_sql("select max(personid) from aidPERSONID")[0][0] if pid: try: pid = int(pid) except (ValueError, TypeError): pid = -1 pid += 1 set_person_data(pid, 'uid', str(uid)) return pid if force and pid >= 0: run_sql("delete from aidPERSONID where tag=%s and data=%s", ('uid', uid)) set_person_data(pid, 'uid', str(uid)) return pid elif force and pid < 0: return - 1 current = get_personid_from_uid(((uid,),)) if current[1]: return current[0][0] else: if pid >= 0: cuid = get_person_data(pid, 'uid') if len(cuid) > 0: if str(cuid[0][1]) == str(uid): return pid else: if create_new_pid: create_new_person(uid) else: return - 1 else: set_person_data(pid, 'uid', str(uid)) return pid else: if create_new_pid: create_new_person(uid) else: return - 1 def get_personid_from_uid(uid): ''' Returns the personID associated with the provided ui. If the personID is already associated with the person the secon parameter is True, false otherwise. If there is more then one compatible results the persons are listed in order of name compatibility. If no persons are found returns ([-1],False) If there is none, associates on a best effort basis the best matching personid to the uid. @param uid: userID @type uid: ((int,),) ''' pid = run_sql("select * from aidPERSONID where tag=%s and data=%s", ('uid', str(uid[0][0]))) if len(pid) == 1: return ([pid[0][1]], True) else: user_info = collect_user_info(uid[0][0]) try: surname = user_info['external_familyname'] except: return ([-1], False) try: name = user_info['external_firstname'] except: name = '' pid = find_personIDs_by_name_string(create_normalized_name( split_name_parts(surname + ', ' + name))) if len(pid) < 1: return ([-1], False) valid_pids = [] for p in pid: uid = run_sql("select * from aidPERSONID where tag=%s and personid=%s", ('uid', str(p[0]))) if len(uid) == 0: if split_name_parts(surname + ', ' + name)[0].lower() == split_name_parts(p[1][0][0])[0].lower(): valid_pids.append(p[0]) if len(valid_pids) > 0: return (valid_pids, False) else: return ([-1], False) def get_possible_bibrecref(names, bibrec, always_match=False): ''' Returns a list of bibrefs for which the surname is matching @param names: list of names strings @param bibrec: bibrec number @param always_match: match with all the names (full bibrefs list) ''' splitted_names = [] for n in names: splitted_names.append(split_name_parts(n)) bibrec_names_100 = run_sql("select id,value from bib10x where tag='100__a' and id in " "(select id_bibxxx from bibrec_bib10x where id_bibrec=%s)", (str(bibrec),)) bibrec_names_700 = run_sql("select id,value from bib70x where tag='700__a' and id in " "(select id_bibxxx from bibrec_bib70x where id_bibrec=%s)", (str(bibrec),)) bibreflist = [] for b in bibrec_names_100: spb = split_name_parts(b[1]) for n in splitted_names: if (n[0].lower() == spb[0].lower()) or always_match: if ['100:' + str(b[0]), b[1]] not in bibreflist: bibreflist.append(['100:' + str(b[0]), b[1]]) for b in bibrec_names_700: spb = split_name_parts(b[1]) for n in splitted_names: if (n[0].lower() == spb[0].lower()) or always_match: if ['700:' + str(b[0]), b[1]] not in bibreflist: bibreflist.append(['700:' + str(b[0]), b[1]]) return bibreflist def get_possible_personids_from_paperlist(bibrecreflist): ''' @param bibrecreflist: list of bibrecref couples, (('100:123,123',),) returns a list of pids and connected bibrefs in order of number of bibrefs per pid [ [['1'],['123:123.123','123:123.123']] , [['2'],['123:123.123']] ] ''' pid_bibrecref_dict = {} for b in bibrecreflist: pids = run_sql("select personid from aidPERSONID where tag=%s and data=%s", ('paper', str(b[0]))) for pid in pids: if pid[0] in pid_bibrecref_dict: pid_bibrecref_dict[pid[0]].append(str(b[0])) else: pid_bibrecref_dict[pid[0]] = [str(b[0])] pid_list = [[i, pid_bibrecref_dict[i]] for i in pid_bibrecref_dict] return sorted(pid_list, key=lambda k: len(k[1]), reverse=True) def get_processed_external_recids(pid): db_data = get_person_data(pid, "processed_external_recids") recid_list_str = '' if db_data and db_data[0] and db_data[0][1]: recid_list_str = db_data[0][1] return recid_list_str def set_processed_external_recids(pid, recid_list_str): del_person_data(pid, "processed_external_recids") set_person_data(pid, "processed_external_recids", recid_list_str) diff --git a/modules/bibauthorid/lib/bibauthorid_webapi.py b/modules/bibauthorid/lib/bibauthorid_webapi.py index 303ecedb3..66cba635b 100644 --- a/modules/bibauthorid/lib/bibauthorid_webapi.py +++ b/modules/bibauthorid/lib/bibauthorid_webapi.py @@ -1,612 +1,631 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ''' Bibauthorid_webapi Point of access to the documents clustering facility. Provides utilities to safely interact with stored data. ''' import invenio.bibauthorid_personid_tables_utils as tu from invenio.dbquery import OperationalError from invenio.access_control_admin import acc_find_user_role_actions from cgi import escape from time import gmtime, strftime def get_bibrefs_from_bibrecs(bibreclist): lists = [] for bibrec in bibreclist: lists.append([bibrec, tu.get_possible_bibrecref([''], bibrec, always_match=True)]) return lists def get_possible_bibrefs_from_pid_bibrec(pid, bibreclist, always_match=False): ''' Returns for each bibrec a list of bibrefs for which the surname matches. @param pid: person id to gather the names strings from @param bibreclist: list of bibrecs on which to search ''' pid = _wash_integer_id(pid) pid_names = tu.get_person_names_set([pid]) lists = [] for bibrec in bibreclist: lists.append([bibrec, tu.get_possible_bibrecref([n[0] for n in pid_names], bibrec, always_match)]) return lists def get_pid_from_uid(uid): ''' Return the PID associated with the uid ''' if not isinstance(uid, tuple): uid = ((uid,),) return tu.get_personid_from_uid(uid) def get_user_level(uid): ''' Finds and returns the aid-universe-internal numeric user level @param uid: the user's id @type uid: int @return: A numerical representation of the maximum access level of a user @rtype: int ''' actions = [row[1] for row in acc_find_user_role_actions({'uid': uid})] return max([tu.resolve_paper_access_right(acc) for acc in actions]) def get_person_id_from_paper(bibref=None): ''' Returns the id of the person who wrote the paper @param bibref: the bibref,bibrec pair that identifies the person @type bibref: str @return: the person id @rtype: int ''' if not _is_valid_bibref(bibref): return - 1 person_id = -1 db_data = tu.get_papers_status([(bibref,)]) try: person_id = db_data[0][1] except (IndexError): pass return person_id def get_papers_by_person_id(person_id= -1, rec_status= -2): ''' Returns all the papers written by the person @param person_id: identifier of the person to retrieve papers from @type person_id: int @param rec_status: minimal flag status a record must have to be displayed @type rec_status: int @return: list of record ids @rtype: list of int ''' if not isinstance(person_id, int): try: person_id = int(person_id) except (ValueError, TypeError): return [] if person_id < 0: return [] if not isinstance(rec_status, int): return [] db_data = tu.get_person_papers((person_id,), rec_status, show_author_name=True, show_title=False) records = [[row["data"].split(",")[1], row["data"], row["flag"], row["authorname"]] for row in db_data] return records def get_papers_cluster(bibref): ''' Returns the cluster of documents connected with this one @param bibref: the table:bibref,bibrec pair to look for @type bibref: str @return: a list of record IDs @rtype: list of int ''' papers = [] person_id = get_person_id_from_paper(bibref) if person_id > -1: papers = get_papers_by_person_id(person_id) return papers def get_person_names_from_id(person_id= -1): ''' Finds and returns the names associated with this person along with the frequency of occurrence (i.e. the number of papers) @param person_id: an id to find the names for @type person_id: int @return: name and number of occurrences of the name @rtype: tuple of tuple ''' # #retrieve all rows for the person if (not person_id > -1) or (not isinstance(person_id, int)): return [] return tu.get_person_names_count((person_id,)) +def get_person_db_names_from_id(person_id= -1): + ''' + Finds and returns the names associated with this person as stored in the + meta data of the underlying data set along with the + frequency of occurrence (i.e. the number of papers) + + @param person_id: an id to find the names for + @type person_id: int + + @return: name and number of occurrences of the name + @rtype: tuple of tuple + ''' +# #retrieve all rows for the person + if (not person_id > -1) or (not isinstance(person_id, int)): + return [] + + return tu.get_person_db_names_count((person_id,)) + + def get_paper_status(person_id, bibref): ''' Finds an returns the status of a bibrec to person assignment @param person_id: the id of the person to check against @type person_id: int @param bibref: the bibref-bibrec pair that unambiguously identifies a paper @type bibref: string ''' db_data = tu.get_papers_status([[bibref]]) #data,PersonID,flag status = None try: status = db_data[0][2] except IndexError: status = -10 status = _wash_integer_id(status) return status def _wash_integer_id(param_id): ''' Creates an int out of either int or string @param param_id: the number to be washed @type param_id: int or string @return: The int representation of the param or -1 @rtype: int ''' pid = -1 try: pid = int(param_id) except (ValueError, TypeError): return (-1) return pid def _is_valid_bibref(bibref): ''' Determines if the provided string is a valid bibref-bibrec pair @param bibref: the bibref-bibrec pair that unambiguously identifies a paper @type bibref: string @return: True if it is a bibref-bibrec pair and False if it's not @rtype: boolean ''' if (not isinstance(bibref, str)) or (not bibref): return False if not bibref.count(":"): return False if not bibref.count(","): return False try: table = bibref.split(":")[0] ref = bibref.split(":")[1].split(",")[0] bibrec = bibref.split(":")[1].split(",")[1] except IndexError: return False try: table = int(table) ref = int(ref) bibrec = int(bibrec) except (ValueError, TypeError): return False return True def confirm_person_bibref_assignments(person_id, bibrefs, uid): ''' Confirms a bibref-bibrec assignment to a person. That internally raises the flag of the entry to 2, which means 'user confirmed' and sets the user level to the highest level of the user provided as param @param person_id: the id of the person to confirm the assignment to @type person_id: int @param bibrefs: the bibref-bibrec pairs that unambiguously identify records @type bibrefs: list of strings @param uid: the id of the user that arranges the confirmation @type uid: int @return: True if the process ran smoothly, False if there was an error @rtype: boolean ''' pid = _wash_integer_id(person_id) refs = [] if pid < 0: return False if not isinstance(bibrefs, list) or not len(bibrefs): return False else: for bibref in bibrefs: if _is_valid_bibref(bibref): refs.append((bibref,)) else: return False try: tu.confirm_papers_to_person((pid,), refs, get_user_level(uid)) except OperationalError: return False return True def repeal_person_bibref_assignments(person_id, bibrefs, uid): ''' Repeals a bibref-bibrec assignment from a person. That internally sets the flag of the entry to -2, which means 'user repealed' and sets the user level to the highest level of the user provided as param @param person_id: the id of the person to repeal the assignment from @type person_id: int @param bibrefs: the bibref-bibrec pairs that unambiguously identify records @type bibrefs: list of strings @param uid: the id of the user that arranges the repulsion @type uid: int @return: True if the process ran smoothly, False if there was an error @rtype: boolean ''' pid = _wash_integer_id(person_id) refs = [] if pid < 0: return False if not isinstance(bibrefs, list) or not len(bibrefs): return False else: for bibref in bibrefs: if _is_valid_bibref(bibref): refs.append((bibref,)) else: return False try: tu.reject_papers_from_person((pid,), refs, get_user_level(uid)) except OperationalError: return False return True def reset_person_bibref_decisions(person_id, bibrefs): ''' Resets a bibref-bibrec assignment of a person. That internally sets the flag of the entry to 0, which means 'no user interaction' and sets the user level to 0 to give the record free for claiming/curation @param person_id: the id of the person to reset the assignment from @type person_id: int @param bibrefs: the bibref-bibrec pairs that unambiguously identify records @type bibrefs: list of strings @return: True if the process ran smoothly, False if there was an error @rtype: boolean ''' pid = _wash_integer_id(person_id) refs = [] if pid < 0: return False if not isinstance(bibrefs, list) or not len(bibrefs): return False else: for bibref in bibrefs: if _is_valid_bibref(bibref): refs.append((bibref,)) else: return False try: tu.reset_papers_flag((person_id,), refs) except OperationalError: return False return True def add_person_comment(person_id, message): ''' Adds a comment to a person after enriching it with meta-data (date+time) @param person_id: person id to assign the comment to @type person_id: int @param message: defines the comment to set @type message: string @return the message incl. the metadata if everything was fine, False on err @rtype: string or boolean ''' msg = "" pid = -1 try: msg = str(message) pid = int(person_id) except (ValueError, TypeError): return False strtimestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime()) msg = escape(msg, quote=True) dbmsg = "%s;;;%s" % (strtimestamp, msg) tu.set_person_data(pid, "comment", dbmsg) return dbmsg def get_person_comments(person_id): ''' Get all comments from a person @param person_id: person id to get the comments from @type person_id: int @return the message incl. the metadata if everything was fine, False on err @rtype: string or boolean ''' pid = -1 comments = [] try: pid = int(person_id) except (ValueError, TypeError): return False for row in tu.get_person_data(pid, "comment"): comments.append(row[1]) return comments def search_person_ids_by_name(namequery): ''' Prepares the search to search in the database @param namequery: the search query the user enquired @type namequery: string @return: information about the result w/ probability and occurrence @rtype: tuple of tuple ''' query = "" escaped_query = "" try: query = str(namequery) except (ValueError, TypeError): return [] if query: escaped_query = escape(query, quote=True) else: return [] return tu.find_personIDs_by_name_string(escaped_query) def log(userinfo, personid, action, tag, value, comment='', transactionid=0): ''' Log an action performed by a user Examples (in the DB): 1 2010-09-30 19:30 admin||10.0.0.1 1 assign paper 1133:4442 'from 23' 1 2010-09-30 19:30 admin||10.0.0.1 1 assign paper 8147:4442 2 2010-09-30 19:35 admin||10.0.0.1 1 reject paper 72:4442 @param userinfo: information about the user [UID|IP] @type userinfo: string @param personid: ID of the person this action is targeting @type personid: int @param action: intended action @type action: string @param tag: A tag to describe the data entered @type tag: string @param value: The value of the action described by the tag @type value: string @param comment: Optional comment to describe the transaction @type comment: string @param transactionid: May group bulk operations together @type transactionid: int @return: Returns the current transactionid @rtype: int ''' userinfo = escape(str(userinfo)) action = escape(str(action)) tag = escape(str(tag)) value = escape(str(value)) comment = escape(str(comment)) if not isinstance(personid, int): try: personid = int(personid) except (ValueError, TypeError): return - 1 if not isinstance(transactionid, int): try: transactionid = int(transactionid) except (ValueError, TypeError): return - 1 return tu.insert_user_log(userinfo, personid, action, tag, value, comment, transactionid) def user_can_modify_data(uid, pid): ''' Determines if a user may modify the data of a person @param uid: the id of a user (invenio user id) @type uid: int @param pid: the id of a person @type pid: int @return: True if the user may modify data, False if not @rtype: boolean @raise ValueError: if the supplied parameters are invalid ''' if not isinstance(uid, int): try: uid = int(uid) except (ValueError, TypeError): raise ValueError("User ID has to be a number!") if not isinstance(pid, int): try: pid = int(pid) except (ValueError, TypeError): raise ValueError("Person ID has to be a number!") return tu.user_can_modify_data(uid, pid) def user_can_modify_paper(uid, paper): ''' Determines if a user may modify the record assignments of a person @param uid: the id of a user (invenio user id) @type uid: int @param pid: the id of a person @type pid: int @return: True if the user may modify data, False if not @rtype: boolean @raise ValueError: if the supplied parameters are invalid ''' if not isinstance(uid, int): try: uid = int(uid) except (ValueError, TypeError): raise ValueError("User ID has to be a number!") if not paper: raise ValueError("A bibref is expected!") return tu.user_can_modify_paper(uid, paper) def person_bibref_is_touched(pid, bibref): ''' Determines if an assignment has been touched by a user (i.e. check for the flag of an assignment being 2 or -2) @param pid: the id of the person to check against @type pid: int @param bibref: the bibref-bibrec pair that unambiguously identifies a paper @type bibref: string @raise ValueError: if the supplied parameters are invalid ''' if not isinstance(pid, int): try: pid = int(pid) except (ValueError, TypeError): raise ValueError("Person ID has to be a number!") if not bibref: raise ValueError("A bibref is expected!") return tu.person_bibref_is_touched(pid, bibref) def assign_uid_to_person(uid, pid, create_new_pid=False): pid = _wash_integer_id(pid) uid = _wash_integer_id(uid) tu.assign_uid_to_person(uid, pid, create_new_pid) def get_review_needing_records(pid): pid = _wash_integer_id(pid) db_data = tu.get_person_papers_to_be_manually_reviewed(pid) return [int(row[1]) for row in db_data if row[1]] def add_review_needing_record(pid, bibrec_id): pid = _wash_integer_id(pid) bibrec_id = _wash_integer_id(bibrec_id) tu.add_person_paper_needs_manual_review(pid, bibrec_id) def del_review_needing_record(pid, bibrec_id): pid = _wash_integer_id(pid) bibrec_id = _wash_integer_id(bibrec_id) tu.del_person_papers_needs_manual_review(pid, bibrec_id) def get_processed_external_recids(pid): list_str = tu.get_processed_external_recids(pid) return list_str.split(";") def set_processed_external_recids(pid, recid_list): if isinstance(recid_list, list): recid_list_str = ";".join(recid_list) tu.set_processed_external_recids(pid, recid_list_str) diff --git a/modules/bibformat/lib/elements/bfe_authors.py b/modules/bibformat/lib/elements/bfe_authors.py index d099d73df..2a2cc8711 100644 --- a/modules/bibformat/lib/elements/bfe_authors.py +++ b/modules/bibformat/lib/elements/bfe_authors.py @@ -1,146 +1,147 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints authors """ __revision__ = "$Id$" def format_element(bfo, limit, separator=' ; ', extension='[...]', print_links="yes", print_affiliations='no', affiliation_prefix=' (', affiliation_suffix=')', interactive="no", - highlight="no"): + highlight="no", + link_author_pages="no"): """ Prints the list of authors of a record. @param limit: the maximum number of authors to display @param separator: the separator between authors. @param extension: a text printed if more authors than 'limit' exist @param print_links: if yes, prints the authors as HTML link to their publications @param print_affiliations: if yes, make each author name followed by its affiliation @param affiliation_prefix: prefix printed before each affiliation @param affiliation_suffix: suffix printed after each affiliation @param interactive: if yes, enable user to show/hide authors when there are too many (html + javascript) @param highlight: highlights authors corresponding to search query if set to 'yes' """ from urllib import quote from cgi import escape from invenio.config import CFG_SITE_URL from invenio.messages import gettext_set_language _ = gettext_set_language(bfo.lang) # load the right message language authors = [] authors_1 = bfo.fields('100__') authors_2 = bfo.fields('700__') authors.extend(authors_1) authors.extend(authors_2) nb_authors = len(authors) bibrec_id = bfo.control_field("001") # Process authors to add link, highlight and format affiliation for author in authors: if author.has_key('a'): if highlight == 'yes': from invenio import bibformat_utils author['a'] = bibformat_utils.highlight(author['a'], bfo.search_pattern) if print_links.lower() == "yes": - if True: # FIXME: /author/123:Ellis is not a user-friendly default + if link_author_pages == "no": author['a'] = '<a href="' + CFG_SITE_URL + \ '/search?f=author&p=' + quote(author['a']) + \ '&ln=' + bfo.lang + \ '">' + escape(author['a']) + '</a>' else: author['a'] = '<a href="' + CFG_SITE_URL + \ - '/author/' + bibrec_id + ':' + \ - quote(author['a']) + \ + '/author/' + quote(author['a']) + \ + '?recid=' + bibrec_id + \ '&ln=' + bfo.lang + \ '">' + escape(author['a']) + '</a>' if author.has_key('u'): if print_affiliations == "yes": author['u'] = affiliation_prefix + author['u'] + \ affiliation_suffix # Flatten author instances if print_affiliations == 'yes': authors = [author.get('a', '') + author.get('u', '') for author in authors] else: authors = [author.get('a', '') for author in authors] if limit.isdigit() and nb_authors > int(limit) and interactive != "yes": return separator.join(authors[:int(limit)]) + extension elif limit.isdigit() and nb_authors > int(limit) and interactive == "yes": out = ''' <script type="text/javascript"> function toggle_authors_visibility(){ var more = document.getElementById('more'); var link = document.getElementById('link'); var extension = document.getElementById('extension'); if (more.style.display=='none'){ more.style.display = ''; extension.style.display = 'none'; link.innerHTML = "%(show_less)s" } else { more.style.display = 'none'; extension.style.display = ''; link.innerHTML = "%(show_more)s" } link.style.color = "rgb(204,0,0);" } function set_up(){ var extension = document.getElementById('extension'); extension.innerHTML = "%(extension)s"; toggle_authors_visibility(); } </script> ''' % {'show_less':_("Hide"), 'show_more':_("Show all %i authors") % nb_authors, 'extension':extension} out += '<a name="show_hide" />' out += separator.join(authors[:int(limit)]) out += '<span id="more" style="">' + separator + \ separator.join(authors[int(limit):]) + '</span>' out += ' <span id="extension"></span>' out += ' <small><i><a id="link" href="#" onclick="toggle_authors_visibility()" style="color:rgb(204,0,0);"></a></i></small>' out += '<script type="text/javascript">set_up()</script>' return out elif nb_authors > 0: return separator.join(authors) def escape_values(bfo): """ Called by BibFormat in order to check if output of this element should be escaped. """ return 0 diff --git a/modules/websearch/lib/websearch_templates.py b/modules/websearch/lib/websearch_templates.py index aeca59ac0..1547d04c2 100644 --- a/modules/websearch/lib/websearch_templates.py +++ b/modules/websearch/lib/websearch_templates.py @@ -1,4242 +1,4291 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0301 __revision__ = "$Id$" import time import cgi import string import re import locale from urllib import quote, urlencode from xml.sax.saxutils import escape as xml_escape from invenio.config import \ CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH, \ CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, \ CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, \ CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD, \ CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \ CFG_WEBSEARCH_SPLIT_BY_COLLECTION, \ CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, \ CFG_BIBRANK_SHOW_READING_STATS, \ CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \ CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \ CFG_BIBRANK_SHOW_CITATION_LINKS, \ CFG_BIBRANK_SHOW_CITATION_STATS, \ CFG_BIBRANK_SHOW_CITATION_GRAPHS, \ CFG_WEBSEARCH_RSS_TTL, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_SITE_NAME_INTL, \ CFG_VERSION, \ CFG_SITE_URL, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_ADMIN_EMAIL, \ CFG_INSPIRE_SITE, \ CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \ CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES, \ CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \ CFG_BIBINDEX_CHARS_PUNCTUATION, \ CFG_WEBCOMMENT_ALLOW_COMMENTS, \ CFG_WEBCOMMENT_ALLOW_REVIEWS, \ CFG_WEBSEARCH_SHOW_COMMENT_COUNT, \ CFG_WEBSEARCH_SHOW_REVIEW_COUNT from invenio.dbquery import run_sql from invenio.messages import gettext_set_language from invenio.urlutils import make_canonical_urlargd, drop_default_urlargd, create_html_link, create_url from invenio.htmlutils import nmtoken_from_string from invenio.webinterface_handler import wash_urlargd from invenio.bibrank_citation_searcher import get_cited_by_count from invenio.intbitset import intbitset from invenio.websearch_external_collections import external_collection_get_state, get_external_collection_engine from invenio.websearch_external_collections_utils import get_collection_id from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS _RE_PUNCTUATION = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION) _RE_SPACES = re.compile(r"\s+") def get_fieldvalues(recID, tag): """Return list of field values for field TAG inside record RECID. FIXME: should be imported commonly for search_engine too.""" out = [] if tag == "001___": # we have asked for recID that is not stored in bibXXx tables out.append(str(recID)) else: # we are going to look inside bibXXx tables digit = tag[0:2] bx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \ "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag) res = run_sql(query) for row in res: out.append(row[0]) return out class Template: # This dictionary maps Invenio language code to locale codes (ISO 639) tmpl_localemap = { 'bg': 'bg_BG', 'ca': 'ca_ES', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES', 'pt': 'pt_BR', 'fr': 'fr_FR', 'it': 'it_IT', 'ka': 'ka_GE', 'ro': 'ro_RO', 'ru': 'ru_RU', 'rw': 'rw_RW', 'sk': 'sk_SK', 'cs': 'cs_CZ', 'no': 'no_NO', 'sv': 'sv_SE', 'uk': 'uk_UA', 'ja': 'ja_JA', 'pl': 'pl_PL', 'hr': 'hr_HR', 'zh_CN': 'zh_CN', 'zh_TW': 'zh_TW', 'hu': 'hu_HU', 'af': 'af_ZA', 'gl': 'gl_ES' } tmpl_default_locale = "en_US" # which locale to use by default, useful in case of failure # Type of the allowed parameters for the web interface for search results search_results_default_urlargd = { 'cc': (str, CFG_SITE_NAME), 'c': (list, []), 'p': (str, ""), 'f': (str, ""), 'rg': (int, CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS), 'sf': (str, ""), 'so': (str, "d"), 'sp': (str, ""), 'rm': (str, ""), 'of': (str, "hb"), 'ot': (list, []), 'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'p1': (str, ""), 'f1': (str, ""), 'm1': (str, ""), 'op1':(str, ""), 'p2': (str, ""), 'f2': (str, ""), 'm2': (str, ""), 'op2':(str, ""), 'p3': (str, ""), 'f3': (str, ""), 'm3': (str, ""), 'sc': (int, 0), 'jrec': (int, 0), 'recid': (int, -1), 'recidb': (int, -1), 'sysno': (str, ""), 'id': (int, -1), 'idb': (int, -1), 'sysnb': (str, ""), 'action': (str, "search"), 'action_search': (str, ""), 'action_browse': (str, ""), 'd1': (str, ""), 'd1y': (int, 0), 'd1m': (int, 0), 'd1d': (int, 0), 'd2': (str, ""), 'd2y': (int, 0), 'd2m': (int, 0), 'd2d': (int, 0), 'dt': (str, ""), 'ap': (int, 1), 'verbose': (int, 0), 'ec': (list, []), } # ...and for search interfaces search_interface_default_urlargd = { 'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'verbose': (int, 0)} # ...and for RSS feeds rss_default_urlargd = {'c' : (list, []), 'cc' : (str, ""), 'p' : (str, ""), 'f' : (str, ""), 'p1' : (str, ""), 'f1' : (str, ""), 'm1' : (str, ""), 'op1': (str, ""), 'p2' : (str, ""), 'f2' : (str, ""), 'm2' : (str, ""), 'op2': (str, ""), 'p3' : (str, ""), 'f3' : (str, ""), 'm3' : (str, "")} tmpl_openurl_accepted_args = { 'id' : (list, []), 'genre' : (str, ''), 'aulast' : (str, ''), 'aufirst' : (str, ''), 'auinit' : (str, ''), 'auinit1' : (str, ''), 'auinitm' : (str, ''), 'issn' : (str, ''), 'eissn' : (str, ''), 'coden' : (str, ''), 'isbn' : (str, ''), 'sici' : (str, ''), 'bici' : (str, ''), 'title' : (str, ''), 'stitle' : (str, ''), 'atitle' : (str, ''), 'volume' : (str, ''), 'part' : (str, ''), 'issue' : (str, ''), 'spage' : (str, ''), 'epage' : (str, ''), 'pages' : (str, ''), 'artnum' : (str, ''), 'date' : (str, ''), 'ssn' : (str, ''), 'quarter' : (str, ''), 'url_ver' : (str, ''), 'ctx_ver' : (str, ''), 'rft_val_fmt' : (str, ''), 'rft_id' : (list, []), 'rft.atitle' : (str, ''), 'rft.title' : (str, ''), 'rft.jtitle' : (str, ''), 'rft.stitle' : (str, ''), 'rft.date' : (str, ''), 'rft.volume' : (str, ''), 'rft.issue' : (str, ''), 'rft.spage' : (str, ''), 'rft.epage' : (str, ''), 'rft.pages' : (str, ''), 'rft.artnumber' : (str, ''), 'rft.issn' : (str, ''), 'rft.eissn' : (str, ''), 'rft.aulast' : (str, ''), 'rft.aufirst' : (str, ''), 'rft.auinit' : (str, ''), 'rft.auinit1' : (str, ''), 'rft.auinitm' : (str, ''), 'rft.ausuffix' : (str, ''), 'rft.au' : (list, []), 'rft.aucorp' : (str, ''), 'rft.isbn' : (str, ''), 'rft.coden' : (str, ''), 'rft.sici' : (str, ''), 'rft.genre' : (str, 'unknown'), 'rft.chron' : (str, ''), 'rft.ssn' : (str, ''), 'rft.quarter' : (int, ''), 'rft.part' : (str, ''), 'rft.btitle' : (str, ''), 'rft.isbn' : (str, ''), 'rft.atitle' : (str, ''), 'rft.place' : (str, ''), 'rft.pub' : (str, ''), 'rft.edition' : (str, ''), 'rft.tpages' : (str, ''), 'rft.series' : (str, ''), } tmpl_opensearch_rss_url_syntax = "%(CFG_SITE_URL)s/rss?p={searchTerms}&jrec={startIndex}&rg={count}&ln={language}&startIndex" % {'CFG_SITE_URL': CFG_SITE_URL} tmpl_opensearch_html_url_syntax = "%(CFG_SITE_URL)s/search?p={searchTerms}&jrec={startIndex}&rg={count}&ln={language}&startIndex" % {'CFG_SITE_URL': CFG_SITE_URL} def tmpl_openurl2invenio(self, openurl_data): """ Return an Invenio url corresponding to a search with the data included in the openurl form map. """ def isbn_to_isbn13_isbn10(isbn): isbn = isbn.replace(' ', '').replace('-', '') if len(isbn) == 10 and isbn.isdigit(): ## We already have isbn10 return ('', isbn) if len(isbn) != 13 and isbn.isdigit(): return ('', '') isbn13, isbn10 = isbn, isbn[3:-1] checksum = 0 weight = 10 for char in isbn10: checksum += int(char) * weight weight -= 1 checksum = 11 - (checksum % 11) if checksum == 10: isbn10 += 'X' if checksum == 11: isbn10 += '0' else: isbn10 += str(checksum) return (isbn13, isbn10) from invenio.search_engine import perform_request_search doi = '' pmid = '' bibcode = '' oai = '' issn = '' isbn = '' for elem in openurl_data['id']: if elem.startswith('doi:'): doi = elem[len('doi:'):] elif elem.startswith('pmid:'): pmid = elem[len('pmid:'):] elif elem.startswith('bibcode:'): bibcode = elem[len('bibcode:'):] elif elem.startswith('oai:'): oai = elem[len('oai:'):] for elem in openurl_data['rft_id']: if elem.startswith('info:doi/'): doi = elem[len('info:doi/'):] elif elem.startswith('info:pmid/'): pmid = elem[len('info:pmid/'):] elif elem.startswith('info:bibcode/'): bibcode = elem[len('info:bibcode/'):] elif elem.startswith('info:oai/'): oai = elem[len('info:oai/')] elif elem.startswith('urn:ISBN:'): isbn = elem[len('urn:ISBN:'):] elif elem.startswith('urn:ISSN:'): issn = elem[len('urn:ISSN:'):] ## Building author query aulast = openurl_data['rft.aulast'] or openurl_data['aulast'] aufirst = openurl_data['rft.aufirst'] or openurl_data['aufirst'] auinit = openurl_data['rft.auinit'] or \ openurl_data['auinit'] or \ openurl_data['rft.auinit1'] + ' ' + openurl_data['rft.auinitm'] or \ openurl_data['auinit1'] + ' ' + openurl_data['auinitm'] or aufirst[:1] auinit = auinit.upper() if aulast and aufirst: author_query = 'author:"%s, %s" or author:"%s, %s"' % (aulast, aufirst, aulast, auinit) elif aulast and auinit: author_query = 'author:"%s, %s"' % (aulast, auinit) else: author_query = '' ## Building title query title = openurl_data['rft.atitle'] or \ openurl_data['atitle'] or \ openurl_data['rft.btitle'] or \ openurl_data['rft.title'] or \ openurl_data['title'] if title: title_query = 'title:"%s"' % title title_query_cleaned = 'title:"%s"' % _RE_SPACES.sub(' ', _RE_PUNCTUATION.sub(' ', title)) else: title_query = '' ## Building journal query jtitle = openurl_data['rft.stitle'] or \ openurl_data['stitle'] or \ openurl_data['rft.jtitle'] or \ openurl_data['title'] if jtitle: journal_query = 'journal:"%s"' % jtitle else: journal_query = '' ## Building isbn query isbn = isbn or openurl_data['rft.isbn'] or \ openurl_data['isbn'] isbn13, isbn10 = isbn_to_isbn13_isbn10(isbn) if isbn13: isbn_query = 'isbn:"%s" or isbn:"%s"' % (isbn13, isbn10) elif isbn10: isbn_query = 'isbn:"%s"' % isbn10 else: isbn_query = '' ## Building issn query issn = issn or openurl_data['rft.eissn'] or \ openurl_data['eissn'] or \ openurl_data['rft.issn'] or \ openurl_data['issn'] if issn: issn_query = 'issn:"%s"' % issn else: issn_query = '' ## Building coden query coden = openurl_data['rft.coden'] or openurl_data['coden'] if coden: coden_query = 'coden:"%s"' % coden else: coden_query = '' ## Building doi query if False: #doi: #FIXME Temporaly disabled until doi field is properly setup doi_query = 'doi:"%s"' % doi else: doi_query = '' ## Trying possible searches if doi_query: if perform_request_search(p=doi_query): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : doi_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if isbn_query: if perform_request_search(p=isbn_query): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : isbn_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if coden_query: if perform_request_search(p=coden_query): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : coden_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if author_query and title_query: if perform_request_search(p='%s and %s' % (title_query, author_query)): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : '%s and %s' % (title_query, author_query), 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if title_query: result = len(perform_request_search(p=title_query)) if result == 1: return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : title_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) elif result > 1: return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : title_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'})) ## Nothing worked, let's return a search that the user can improve if author_query and title_query: return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({ 'p' : '%s and %s' % (title_query_cleaned, author_query), 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'}, {})) elif title_query: return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({ 'p' : title_query_cleaned, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'}, {})) else: ## Mmh. Too few information provided. return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({ 'p' : 'recid:-1', 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'}, {})) def tmpl_opensearch_description(self, ln): """ Returns the OpenSearch description file of this site. """ _ = gettext_set_language(ln) return """<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" xmlns:moz="http://www.mozilla.org/2006/browser/search/"> <ShortName>%(short_name)s</ShortName> <LongName>%(long_name)s</LongName> <Description>%(description)s</Description> <InputEncoding>UTF-8</InputEncoding> <OutputEncoding>UTF-8</OutputEncoding> <Language>*</Language> <Contact>%(CFG_SITE_ADMIN_EMAIL)s</Contact> <Query role="example" searchTerms="a" /> <Developer>Powered by Invenio</Developer> <Url type="text/html" indexOffset="1" rel="results" template="%(html_search_syntax)s" /> <Url type="application/rss+xml" indexOffset="1" rel="results" template="%(rss_search_syntax)s" /> <Url type="application/opensearchdescription+xml" rel="self" template="%(CFG_SITE_URL)s/opensearchdescription" /> <moz:SearchForm>%(CFG_SITE_URL)s</moz:SearchForm> </OpenSearchDescription>""" % \ {'CFG_SITE_URL': CFG_SITE_URL, 'short_name': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)[:16], 'long_name': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME), 'description': (_("Search on %(x_CFG_SITE_NAME_INTL)s") % \ {'x_CFG_SITE_NAME_INTL': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)})[:1024], 'CFG_SITE_ADMIN_EMAIL': CFG_SITE_ADMIN_EMAIL, 'rss_search_syntax': self.tmpl_opensearch_rss_url_syntax, 'html_search_syntax': self.tmpl_opensearch_html_url_syntax } def build_search_url(self, known_parameters={}, **kargs): """ Helper for generating a canonical search url. 'known_parameters' is the list of query parameters you inherit from your current query. You can then pass keyword arguments to modify this query. build_search_url(known_parameters, of="xm") The generated URL is absolute. """ parameters = {} parameters.update(known_parameters) parameters.update(kargs) # Now, we only have the arguments which have _not_ their default value parameters = drop_default_urlargd(parameters, self.search_results_default_urlargd) # Treat `as' argument specially: if parameters.has_key('aas'): parameters['as'] = parameters['aas'] del parameters['aas'] # Asking for a recid? Return a /record/<recid> URL if 'recid' in parameters: target = "%s/record/%s" % (CFG_SITE_URL, parameters['recid']) del parameters['recid'] target += make_canonical_urlargd(parameters, self.search_results_default_urlargd) return target return "%s/search%s" % (CFG_SITE_URL, make_canonical_urlargd(parameters, self.search_results_default_urlargd)) def build_search_interface_url(self, known_parameters={}, **kargs): """ Helper for generating a canonical search interface URL.""" parameters = {} parameters.update(known_parameters) parameters.update(kargs) c = parameters['c'] del parameters['c'] # Now, we only have the arguments which have _not_ their default value parameters = drop_default_urlargd(parameters, self.search_results_default_urlargd) # Treat `as' argument specially: if parameters.has_key('aas'): parameters['as'] = parameters['aas'] del parameters['aas'] if c and c != CFG_SITE_NAME: base = CFG_SITE_URL + '/collection/' + quote(c) else: base = CFG_SITE_URL return create_url(base, parameters) def build_rss_url(self, known_parameters, **kargs): """Helper for generating a canonical RSS URL""" parameters = {} parameters.update(known_parameters) parameters.update(kargs) # Keep only interesting parameters argd = wash_urlargd(parameters, self.rss_default_urlargd) if argd: # Handle 'c' differently since it is a list c = argd.get('c', []) del argd['c'] # Create query, and drop empty params args = make_canonical_urlargd(argd, self.rss_default_urlargd) if c != []: # Add collections c = [quote(coll) for coll in c] if args == '': args += '?' else: args += '&' args += 'c=' + '&c='.join(c) return CFG_SITE_URL + '/rss' + args def tmpl_record_page_header_content(self, req, recid, ln): """ Provide extra information in the header of /record pages """ _ = gettext_set_language(ln) title = get_fieldvalues(recid, "245__a") if title: title = cgi.escape(title[0]) else: title = _("Record") + ' #%d' % recid keywords = ', '.join(get_fieldvalues(recid, "6531_a")) description = ' '.join(get_fieldvalues(recid, "520__a")) description += "\n" description += '; '.join(get_fieldvalues(recid, "100__a") + get_fieldvalues(recid, "700__a")) return [cgi.escape(x, True) for x in (title, description, keywords)] def tmpl_navtrail_links(self, aas, ln, dads): """ Creates the navigation bar at top of each search page (*Home > Root collection > subcollection > ...*) Parameters: - 'aas' *int* - Should we display an advanced search box? - 'ln' *string* - The language to display - 'separator' *string* - The separator between two consecutive collections - 'dads' *list* - A list of parent links, eachone being a dictionary of ('name', 'longname') """ out = [] for url, name in dads: args = {'c': url, 'as': aas, 'ln': ln} out.append(create_html_link(self.build_search_interface_url(**args), {}, cgi.escape(name), {'class': 'navtrail'})) return ' > '.join(out) def tmpl_webcoll_body(self, ln, collection, te_portalbox, searchfor, np_portalbox, narrowsearch, focuson, instantbrowse, ne_portalbox): """ Creates the body of the main search page. Parameters: - 'ln' *string* - language of the page being generated - 'collection' - collection id of the page being generated - 'te_portalbox' *string* - The HTML code for the portalbox on top of search - 'searchfor' *string* - The HTML code for the search for box - 'np_portalbox' *string* - The HTML code for the portalbox on bottom of search - 'narrowsearch' *string* - The HTML code for the search categories (left bottom of page) - 'focuson' *string* - The HTML code for the "focuson" categories (right bottom of page) - 'ne_portalbox' *string* - The HTML code for the bottom of the page """ if not narrowsearch: narrowsearch = instantbrowse body = ''' <form name="search" action="%(siteurl)s/search" method="get"> %(searchfor)s %(np_portalbox)s <table cellspacing="0" cellpadding="0" border="0" class="narrowandfocusonsearchbox"> <tr> <td valign="top">%(narrowsearch)s</td> ''' % { 'siteurl' : CFG_SITE_URL, 'searchfor' : searchfor, 'np_portalbox' : np_portalbox, 'narrowsearch' : narrowsearch, } if focuson: body += """<td valign="top">""" + focuson + """</td>""" body += """</tr></table> %(ne_portalbox)s </form>""" % {'ne_portalbox' : ne_portalbox} return body def tmpl_portalbox(self, title, body): """Creates portalboxes based on the parameters Parameters: - 'title' *string* - The title of the box - 'body' *string* - The HTML code for the body of the box """ out = """<div class="portalbox"> <div class="portalboxheader">%(title)s</div> <div class="portalboxbody">%(body)s</div> </div>""" % {'title' : cgi.escape(title), 'body' : body} return out def tmpl_searchfor_light(self, ln, collection_id, collection_name, record_count, example_search_queries): # EXPERIMENTAL """Produces light *Search for* box for the current collection. Parameters: - 'ln' *string* - *str* The language to display - 'collection_id' - *str* The collection id - 'collection_name' - *str* The collection name in current language - 'example_search_queries' - *list* List of search queries given as example for this collection """ # load the right message language _ = gettext_set_language(ln) out = ''' <!--create_searchfor_light()--> ''' argd = drop_default_urlargd({'ln': ln, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION}, self.search_results_default_urlargd) # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) header = _("Search %s records for:") % \ self.tmpl_nbrecs_info(record_count, "", "") asearchurl = self.build_search_interface_url(c=collection_id, aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln) # Build example of queries for this collection example_search_queries_links = [create_html_link(self.build_search_url(p=example_query, ln=ln, aas= -1, c=collection_id), {}, cgi.escape(example_query), {'class': 'examplequery'}) \ for example_query in example_search_queries] example_query_html = '' if len(example_search_queries) > 0: example_query_link = example_search_queries_links[0] # offers more examples if possible more = '' if len(example_search_queries_links) > 1: more = ''' <script type="text/javascript"> function toggle_more_example_queries_visibility(){ var more = document.getElementById('more_example_queries'); var link = document.getElementById('link_example_queries'); var sep = document.getElementById('more_example_sep'); if (more.style.display=='none'){ more.style.display = ''; link.innerHTML = "%(show_less)s" link.style.color = "rgb(204,0,0)"; sep.style.display = 'none'; } else { more.style.display = 'none'; link.innerHTML = "%(show_more)s" link.style.color = "rgb(0,0,204)"; sep.style.display = ''; } return false; } </script> <span id="more_example_queries" style="display:none;text-align:right"><br/>%(more_example_queries)s<br/></span> <a id="link_example_queries" href="#" onclick="toggle_more_example_queries_visibility()" style="display:none"></a> <script type="text/javascript"> var link = document.getElementById('link_example_queries'); var sep = document.getElementById('more_example_sep'); link.style.display = ''; link.innerHTML = "%(show_more)s"; sep.style.display = ''; </script> ''' % {'more_example_queries': '<br/>'.join(example_search_queries_links[1:]), 'show_less':_("less"), 'show_more':_("more")} example_query_html += '''<p style="text-align:right;margin:0px;"> %(example)s<span id="more_example_sep" style="display:none;"> :: </span>%(more)s </p> ''' % {'example': _("Example: %(x_sample_search_query)s") % \ {'x_sample_search_query': example_query_link}, 'more': more} # display options to search in current collection or everywhere search_in = '' if collection_name != CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME): search_in += ''' <input type="radio" name="cc" value="%(collection_id)s" id="searchCollection" checked="checked"/> <label for="searchCollection">%(search_in_collection_name)s</label> <input type="radio" name="cc" value="%(root_collection_name)s" id="searchEverywhere" /> <label for="searchEverywhere">%(search_everywhere)s</label> ''' % {'search_in_collection_name': _("Search in %(x_collection_name)s") % \ {'x_collection_name': collection_name}, 'collection_id': collection_id, 'root_collection_name': CFG_SITE_NAME, 'search_everywhere': _("Search everywhere")} # print commentary start: out += ''' <table class="searchbox lightsearch"> <tbody> <tr valign="baseline"> <td class="searchboxbody" align="right"><input type="text" name="p" size="%(sizepattern)d" value="" class="lightsearchfield"/><br/> <small><small>%(example_query_html)s</small></small> </td> <td class="searchboxbody" align="left"> <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" /> </td> <td class="searchboxbody" align="left" rowspan="2" valign="top"> <small><small> <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a><br/> %(asearch)s </small></small> </td> </tr></table> <!--<tr valign="baseline"> <td class="searchboxbody" colspan="2" align="left"> <small> --><small>%(search_in)s</small><!-- </small> </td> </tr> </tbody> </table>--> <!--/create_searchfor_light()--> ''' % {'ln' : ln, 'sizepattern' : CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'siteurl' : CFG_SITE_URL, 'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')), 'header' : header, 'msg_search' : _('Search'), 'msg_browse' : _('Browse'), 'msg_search_tips' : _('Search Tips'), 'search_in': search_in, 'example_query_html': example_query_html} return out def tmpl_searchfor_simple(self, ln, collection_id, collection_name, record_count, middle_option): """Produces simple *Search for* box for the current collection. Parameters: - 'ln' *string* - *str* The language to display - 'collection_id' - *str* The collection id - 'collection_name' - *str* The collection name in current language - 'record_count' - *str* Number of records in this collection - 'middle_option' *string* - HTML code for the options (any field, specific fields ...) """ # load the right message language _ = gettext_set_language(ln) out = ''' <!--create_searchfor_simple()--> ''' argd = drop_default_urlargd({'ln': ln, 'cc': collection_id, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION}, self.search_results_default_urlargd) # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) header = _("Search %s records for:") % \ self.tmpl_nbrecs_info(record_count, "", "") asearchurl = self.build_search_interface_url(c=collection_id, aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln) # print commentary start: out += ''' <table class="searchbox simplesearch"> <thead> <tr align="left"> <th colspan="3" class="searchboxheader">%(header)s</th> </tr> </thead> <tbody> <tr valign="baseline"> <td class="searchboxbody" align="left"><input type="text" name="p" size="%(sizepattern)d" value="" class="simplesearchfield"/></td> <td class="searchboxbody" align="left">%(middle_option)s</td> <td class="searchboxbody" align="left"> <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" /> <input class="formbutton" type="submit" name="action_browse" value="%(msg_browse)s" /></td> </tr> <tr valign="baseline"> <td class="searchboxbody" colspan="3" align="right"> <small> <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a> :: %(asearch)s </small> </td> </tr> </tbody> </table> <!--/create_searchfor_simple()--> ''' % {'ln' : ln, 'sizepattern' : CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'siteurl' : CFG_SITE_URL, 'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')), 'header' : header, 'middle_option' : middle_option, 'msg_search' : _('Search'), 'msg_browse' : _('Browse'), 'msg_search_tips' : _('Search Tips')} return out def tmpl_searchfor_advanced(self, ln, # current language collection_id, collection_name, record_count, middle_option_1, middle_option_2, middle_option_3, searchoptions, sortoptions, rankoptions, displayoptions, formatoptions ): """ Produces advanced *Search for* box for the current collection. Parameters: - 'ln' *string* - The language to display - 'middle_option_1' *string* - HTML code for the first row of options (any field, specific fields ...) - 'middle_option_2' *string* - HTML code for the second row of options (any field, specific fields ...) - 'middle_option_3' *string* - HTML code for the third row of options (any field, specific fields ...) - 'searchoptions' *string* - HTML code for the search options - 'sortoptions' *string* - HTML code for the sort options - 'rankoptions' *string* - HTML code for the rank options - 'displayoptions' *string* - HTML code for the display options - 'formatoptions' *string* - HTML code for the format options """ # load the right message language _ = gettext_set_language(ln) out = ''' <!--create_searchfor_advanced()--> ''' argd = drop_default_urlargd({'ln': ln, 'aas': 1, 'cc': collection_id, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION}, self.search_results_default_urlargd) # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) header = _("Search %s records for") % \ self.tmpl_nbrecs_info(record_count, "", "") header += ':' ssearchurl = self.build_search_interface_url(c=collection_id, aas=min(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln) out += ''' <table class="searchbox advancedsearch"> <thead> <tr> <th class="searchboxheader" colspan="3">%(header)s</th> </tr> </thead> <tbody> <tr valign="bottom"> <td class="searchboxbody" style="white-space: nowrap;"> %(matchbox_m1)s<input type="text" name="p1" size="%(sizepattern)d" value="" class="advancedsearchfield"/> </td> <td class="searchboxbody" style="white-space: nowrap;">%(middle_option_1)s</td> <td class="searchboxbody">%(andornot_op1)s</td> </tr> <tr valign="bottom"> <td class="searchboxbody" style="white-space: nowrap;"> %(matchbox_m2)s<input type="text" name="p2" size="%(sizepattern)d" value="" class="advancedsearchfield"/> </td> <td class="searchboxbody">%(middle_option_2)s</td> <td class="searchboxbody">%(andornot_op2)s</td> </tr> <tr valign="bottom"> <td class="searchboxbody" style="white-space: nowrap;"> %(matchbox_m3)s<input type="text" name="p3" size="%(sizepattern)d" value="" class="advancedsearchfield"/> </td> <td class="searchboxbody">%(middle_option_3)s</td> <td class="searchboxbody" style="white-space: nowrap;"> <input class="formbutton" type="submit" name="action_search" value="%(msg_search)s" /> <input class="formbutton" type="submit" name="action_browse" value="%(msg_browse)s" /></td> </tr> <tr valign="bottom"> <td colspan="3" class="searchboxbody" align="right"> <small> <a href="%(siteurl)s/help/search-tips%(langlink)s">%(msg_search_tips)s</a> :: %(ssearch)s </small> </td> </tr> </tbody> </table> <!-- @todo - more imports --> ''' % {'ln' : ln, 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'siteurl' : CFG_SITE_URL, 'ssearch' : create_html_link(ssearchurl, {}, _("Simple Search")), 'header' : header, 'matchbox_m1' : self.tmpl_matchtype_box('m1', ln=ln), 'middle_option_1' : middle_option_1, 'andornot_op1' : self.tmpl_andornot_box('op1', ln=ln), 'matchbox_m2' : self.tmpl_matchtype_box('m2', ln=ln), 'middle_option_2' : middle_option_2, 'andornot_op2' : self.tmpl_andornot_box('op2', ln=ln), 'matchbox_m3' : self.tmpl_matchtype_box('m3', ln=ln), 'middle_option_3' : middle_option_3, 'msg_search' : _("Search"), 'msg_browse' : _("Browse"), 'msg_search_tips' : _("Search Tips")} if (searchoptions): out += """<table class="searchbox"> <thead> <tr> <th class="searchboxheader"> %(searchheader)s </th> </tr> </thead> <tbody> <tr valign="bottom"> <td class="searchboxbody">%(searchoptions)s</td> </tr> </tbody> </table>""" % { 'searchheader' : _("Search options:"), 'searchoptions' : searchoptions } out += """<table class="searchbox"> <thead> <tr> <th class="searchboxheader"> %(added)s </th> <th class="searchboxheader"> %(until)s </th> </tr> </thead> <tbody> <tr valign="bottom"> <td class="searchboxbody">%(added_or_modified)s %(date_added)s</td> <td class="searchboxbody">%(date_until)s</td> </tr> </tbody> </table> <table class="searchbox"> <thead> <tr> <th class="searchboxheader"> %(msg_sort)s </th> <th class="searchboxheader"> %(msg_display)s </th> <th class="searchboxheader"> %(msg_format)s </th> </tr> </thead> <tbody> <tr valign="bottom"> <td class="searchboxbody">%(sortoptions)s %(rankoptions)s</td> <td class="searchboxbody">%(displayoptions)s</td> <td class="searchboxbody">%(formatoptions)s</td> </tr> </tbody> </table> <!--/create_searchfor_advanced()--> """ % { 'added' : _("Added/modified since:"), 'until' : _("until:"), 'added_or_modified': self.tmpl_inputdatetype(ln=ln), 'date_added' : self.tmpl_inputdate("d1", ln=ln), 'date_until' : self.tmpl_inputdate("d2", ln=ln), 'msg_sort' : _("Sort by:"), 'msg_display' : _("Display results:"), 'msg_format' : _("Output format:"), 'sortoptions' : sortoptions, 'rankoptions' : rankoptions, 'displayoptions' : displayoptions, 'formatoptions' : formatoptions } return out def tmpl_matchtype_box(self, name='m', value='', ln='en'): """Returns HTML code for the 'match type' selection box. Parameters: - 'name' *string* - The name of the produced select - 'value' *string* - The selected value (if any value is already selected) - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) out = """ <select name="%(name)s"> <option value="a"%(sela)s>%(opta)s</option> <option value="o"%(selo)s>%(opto)s</option> <option value="e"%(sele)s>%(opte)s</option> <option value="p"%(selp)s>%(optp)s</option> <option value="r"%(selr)s>%(optr)s</option> </select> """ % {'name' : name, 'sela' : self.tmpl_is_selected('a', value), 'opta' : _("All of the words:"), 'selo' : self.tmpl_is_selected('o', value), 'opto' : _("Any of the words:"), 'sele' : self.tmpl_is_selected('e', value), 'opte' : _("Exact phrase:"), 'selp' : self.tmpl_is_selected('p', value), 'optp' : _("Partial phrase:"), 'selr' : self.tmpl_is_selected('r', value), 'optr' : _("Regular expression:") } return out def tmpl_is_selected(self, var, fld): """ Checks if *var* and *fld* are equal, and if yes, returns ' selected="selected"'. Useful for select boxes. Parameters: - 'var' *string* - First value to compare - 'fld' *string* - Second value to compare """ if var == fld: return ' selected="selected"' else: return "" def tmpl_andornot_box(self, name='op', value='', ln='en'): """ Returns HTML code for the AND/OR/NOT selection box. Parameters: - 'name' *string* - The name of the produced select - 'value' *string* - The selected value (if any value is already selected) - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) out = """ <select name="%(name)s"> <option value="a"%(sela)s>%(opta)s</option> <option value="o"%(selo)s>%(opto)s</option> <option value="n"%(seln)s>%(optn)s</option> </select> """ % {'name' : name, 'sela' : self.tmpl_is_selected('a', value), 'opta' : _("AND"), 'selo' : self.tmpl_is_selected('o', value), 'opto' : _("OR"), 'seln' : self.tmpl_is_selected('n', value), 'optn' : _("AND NOT") } return out def tmpl_inputdate(self, name, ln, sy=0, sm=0, sd=0): """ Produces *From Date*, *Until Date* kind of selection box. Suitable for search options. Parameters: - 'name' *string* - The base name of the produced selects - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) box = """ <select name="%(name)sd"> <option value=""%(sel)s>%(any)s</option> """ % { 'name' : name, 'any' : _("any day"), 'sel' : self.tmpl_is_selected(sd, 0) } for day in range(1, 32): box += """<option value="%02d"%s>%02d</option>""" % (day, self.tmpl_is_selected(sd, day), day) box += """</select>""" # month box += """ <select name="%(name)sm"> <option value=""%(sel)s>%(any)s</option> """ % { 'name' : name, 'any' : _("any month"), 'sel' : self.tmpl_is_selected(sm, 0) } for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \ (5, _("May")), (6, _("June")), (7, _("July")), (8, _("August")), \ (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]: box += """<option value="%02d"%s>%s</option>""" % (mm, self.tmpl_is_selected(sm, mm), month) box += """</select>""" # year box += """ <select name="%(name)sy"> <option value=""%(sel)s>%(any)s</option> """ % { 'name' : name, 'any' : _("any year"), 'sel' : self.tmpl_is_selected(sy, 0) } this_year = int(time.strftime("%Y", time.localtime())) for year in range(this_year - 20, this_year + 1): box += """<option value="%d"%s>%d</option>""" % (year, self.tmpl_is_selected(sy, year), year) box += """</select>""" return box def tmpl_inputdatetype(self, dt='', ln=CFG_SITE_LANG): """ Produces input date type selection box to choose added-or-modified date search option. Parameters: - 'dt' *string - date type (c=created, m=modified) - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) box = """<select name="dt"> <option value="">%(added)s </option> <option value="m"%(sel)s>%(modified)s </option> </select> """ % { 'added': _("Added since:"), 'modified': _("Modified since:"), 'sel': self.tmpl_is_selected(dt, 'm'), } return box def tmpl_narrowsearch(self, aas, ln, type, father, has_grandchildren, sons, display_grandsons, grandsons): """ Creates list of collection descendants of type *type* under title *title*. If aas==1, then links to Advanced Search interfaces; otherwise Simple Search. Suitable for 'Narrow search' and 'Focus on' boxes. Parameters: - 'aas' *bool* - Should we display an advanced search box? - 'ln' *string* - The language to display - 'type' *string* - The type of the produced box (virtual collections or normal collections) - 'father' *collection* - The current collection - 'has_grandchildren' *bool* - If the current collection has grand children - 'sons' *list* - The list of the sub-collections (first level) - 'display_grandsons' *bool* - If the grand children collections should be displayed (2 level deep display) - 'grandsons' *list* - The list of sub-collections (second level) """ # load the right message language _ = gettext_set_language(ln) title = {'r': _("Narrow by collection:"), 'v': _("Focus on:")}[type] if has_grandchildren: style_prolog = "<strong>" style_epilog = "</strong>" else: style_prolog = "" style_epilog = "" out = """<table class="%(narrowsearchbox)s"> <thead> <tr> <th colspan="2" align="left" class="%(narrowsearchbox)sheader"> %(title)s </th> </tr> </thead> <tbody>""" % {'title' : title, 'narrowsearchbox': {'r': 'narrowsearchbox', 'v': 'focusonsearchbox'}[type]} # iterate through sons: i = 0 for son in sons: out += """<tr><td class="%(narrowsearchbox)sbody" valign="top">""" % \ { 'narrowsearchbox': {'r': 'narrowsearchbox', 'v': 'focusonsearchbox'}[type]} if type == 'r': if son.restricted_p() and son.restricted_p() != father.restricted_p(): out += """<input type="checkbox" name="c" value="%(name)s" /></td>""" % {'name' : cgi.escape(son.name) } # hosted collections are checked by default only when configured so elif str(son.dbquery).startswith("hostedcollection:"): external_collection_engine = get_external_collection_engine(str(son.name)) if external_collection_engine and external_collection_engine.selected_by_default: out += """<input type="checkbox" name="c" value="%(name)s" checked="checked" /></td>""" % {'name' : cgi.escape(son.name) } elif external_collection_engine and not external_collection_engine.selected_by_default: out += """<input type="checkbox" name="c" value="%(name)s" /></td>""" % {'name' : cgi.escape(son.name) } else: # strangely, the external collection engine was never found. In that case, # why was the hosted collection here in the first place? out += """<input type="checkbox" name="c" value="%(name)s" /></td>""" % {'name' : cgi.escape(son.name) } else: out += """<input type="checkbox" name="c" value="%(name)s" checked="checked" /></td>""" % {'name' : cgi.escape(son.name) } else: out += '</td>' out += """<td valign="top">%(link)s%(recs)s """ % { 'link': create_html_link(self.build_search_interface_url(c=son.name, ln=ln, aas=aas), {}, style_prolog + cgi.escape(son.get_name(ln)) + style_epilog), 'recs' : self.tmpl_nbrecs_info(son.nbrecs, ln=ln)} # the following prints the "external collection" arrow just after the name and # number of records of the hosted collection # 1) we might want to make the arrow work as an anchor to the hosted collection as well. # That would probably require a new separate function under invenio.urlutils # 2) we might want to place the arrow between the name and the number of records of the hosted collection # That would require to edit/separate the above out += ... if type == 'r': if str(son.dbquery).startswith("hostedcollection:"): out += """<img src="%(siteurl)s/img/external-icon-light-8x8.gif" border="0" alt="%(name)s"/>""" % \ { 'siteurl' : CFG_SITE_URL, 'name' : cgi.escape(son.name), } if son.restricted_p(): out += """ <small class="warning">[%(msg)s]</small> """ % { 'msg' : _("restricted") } if display_grandsons and len(grandsons[i]): # iterate trough grandsons: out += """<br />""" for grandson in grandsons[i]: out += """ <small>%(link)s%(nbrec)s</small> """ % { 'link': create_html_link(self.build_search_interface_url(c=grandson.name, ln=ln, aas=aas), {}, cgi.escape(grandson.get_name(ln))), 'nbrec' : self.tmpl_nbrecs_info(grandson.nbrecs, ln=ln)} # the following prints the "external collection" arrow just after the name and # number of records of the hosted collection # Some relatives comments have been made just above if type == 'r': if str(grandson.dbquery).startswith("hostedcollection:"): out += """<img src="%(siteurl)s/img/external-icon-light-8x8.gif" border="0" alt="%(name)s"/>""" % \ { 'siteurl' : CFG_SITE_URL, 'name' : cgi.escape(grandson.name), } out += """</td></tr>""" i += 1 out += "</tbody></table>" return out def tmpl_searchalso(self, ln, engines_list, collection_id): _ = gettext_set_language(ln) box_name = _("Search also:") html = """<table cellspacing="0" cellpadding="0" border="0"> <tr><td valign="top"><table class="searchalsosearchbox"> <thead><tr><th colspan="2" align="left" class="searchalsosearchboxheader">%(box_name)s </th></tr></thead><tbody> """ % locals() for engine in engines_list: internal_name = engine.name name = _(internal_name) base_url = engine.base_url if external_collection_get_state(engine, collection_id) == 3: checked = ' checked="checked"' else: checked = '' html += """<tr><td class="searchalsosearchboxbody" valign="top"> <input type="checkbox" name="ec" id="%(id)s" value="%(internal_name)s" %(checked)s /></td> <td valign="top" class="searchalsosearchboxbody"> <div style="white-space: nowrap"><label for="%(id)s">%(name)s</label> <a href="%(base_url)s"> <img src="%(siteurl)s/img/external-icon-light-8x8.gif" border="0" alt="%(name)s"/></a> </div></td></tr>""" % \ { 'checked': checked, 'base_url': base_url, 'internal_name': internal_name, 'name': cgi.escape(name), 'id': "extSearch" + nmtoken_from_string(name), 'siteurl': CFG_SITE_URL, } html += """</tbody></table></td></tr></table>""" return html def tmpl_nbrecs_info(self, number, prolog=None, epilog=None, ln=CFG_SITE_LANG): """ Return information on the number of records. Parameters: - 'number' *string* - The number of records - 'prolog' *string* (optional) - An HTML code to prefix the number (if **None**, will be '<small class="nbdoccoll">(') - 'epilog' *string* (optional) - An HTML code to append to the number (if **None**, will be ')</small>') """ if number is None: number = 0 if prolog is None: prolog = ''' <small class="nbdoccoll">(''' if epilog is None: epilog = ''')</small>''' return prolog + self.tmpl_nice_number(number, ln) + epilog def tmpl_box_restricted_content(self, ln): """ Displays a box containing a *restricted content* message Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) return _("This collection is restricted. If you are authorized to access it, please click on the Search button.") def tmpl_box_hosted_collection(self, ln): """ Displays a box containing a *hosted collection* message Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) return _("This is a hosted external collection. Please click on the Search button to see its content.") def tmpl_box_no_records(self, ln): """ Displays a box containing a *no content* message Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) return _("This collection does not contain any document yet.") def tmpl_instant_browse(self, aas, ln, recids, more_link=None): """ Formats a list of records (given in the recids list) from the database. Parameters: - 'aas' *int* - Advanced Search interface or not (0 or 1) - 'ln' *string* - The language to display - 'recids' *list* - the list of records from the database - 'more_link' *string* - the "More..." link for the record. If not given, will not be displayed """ # load the right message language _ = gettext_set_language(ln) body = '''<table class="latestadditionsbox">''' for recid in recids: body += ''' <tr> <td class="latestadditionsboxtimebody">%(date)s</td> <td class="latestadditionsboxrecordbody"> <abbr class="unapi-id" title="%(recid)s"></abbr> %(body)s </td> </tr>''' % { 'recid': recid['id'], 'date': recid['date'], 'body': recid['body'] } body += "</table>" if more_link: body += '<div align="right"><small>' + \ create_html_link(more_link, {}, '[>> %s]' % _("more")) + \ '</small></div>' return ''' <table class="narrowsearchbox"> <thead> <tr> <th class="narrowsearchboxheader">%(header)s</th> </tr> </thead> <tbody> <tr> <td class="narrowsearchboxbody">%(body)s</td> </tr> </tbody> </table>''' % {'header' : _("Latest additions:"), 'body' : body, } def tmpl_searchwithin_select(self, ln, fieldname, selected, values): """ Produces 'search within' selection box for the current collection. Parameters: - 'ln' *string* - The language to display - 'fieldname' *string* - the name of the select box produced - 'selected' *string* - which of the values is selected - 'values' *list* - the list of values in the select """ out = '<select name="%(fieldname)s">' % {'fieldname': fieldname} if values: for pair in values: out += """<option value="%(value)s"%(selected)s>%(text)s</option>""" % { 'value' : cgi.escape(pair['value']), 'selected' : self.tmpl_is_selected(pair['value'], selected), 'text' : cgi.escape(pair['text']) } out += """</select>""" return out def tmpl_select(self, fieldname, values, selected=None, css_class=''): """ Produces a generic select box Parameters: - 'css_class' *string* - optional, a css class to display this select with - 'fieldname' *list* - the name of the select box produced - 'selected' *string* - which of the values is selected - 'values' *list* - the list of values in the select """ if css_class != '': class_field = ' class="%s"' % css_class else: class_field = '' out = '<select name="%(fieldname)s"%(class)s>' % { 'fieldname' : fieldname, 'class' : class_field } for pair in values: if pair.get('selected', False) or pair['value'] == selected: flag = ' selected="selected"' else: flag = '' out += '<option value="%(value)s"%(selected)s>%(text)s</option>' % { 'value' : cgi.escape(str(pair['value'])), 'selected' : flag, 'text' : cgi.escape(pair['text']) } out += """</select>""" return out def tmpl_record_links(self, recid, ln, sf='', so='d', sp='', rm=''): """ Displays the *More info* and *Find similar* links for a record Parameters: - 'ln' *string* - The language to display - 'recid' *string* - the id of the displayed record """ # load the right message language _ = gettext_set_language(ln) out = '''<br /><span class="moreinfo">%(detailed)s - %(similar)s</span>''' % { 'detailed': create_html_link(self.build_search_url(recid=recid, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}), 'similar': create_html_link(self.build_search_url(p="recid:%d" % recid, rm='wrd', ln=ln), {}, _("Similar records"), {'class': "moreinfo"})} if CFG_BIBRANK_SHOW_CITATION_LINKS: num_timescited = get_cited_by_count(recid) if num_timescited: out += '''<span class="moreinfo"> - %s </span>''' % \ create_html_link(self.build_search_url(p='refersto:recid:%d' % recid, sf=sf, so=so, sp=sp, rm=rm, ln=ln), {}, _("Cited by %i records") % num_timescited, {'class': "moreinfo"}) return out def tmpl_record_body(self, titles, authors, dates, rns, abstracts, urls_u, urls_z, ln): """ Displays the "HTML basic" format of a record Parameters: - 'authors' *list* - the authors (as strings) - 'dates' *list* - the dates of publication - 'rns' *list* - the quicknotes for the record - 'abstracts' *list* - the abstracts for the record - 'urls_u' *list* - URLs to the original versions of the record - 'urls_z' *list* - Not used """ out = "" for title in titles: out += "<strong>%(title)s</strong> " % { 'title' : cgi.escape(title) } if authors: out += " / " for author in authors[:CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD]: out += '%s ' % \ create_html_link(self.build_search_url(p=author, f='author', ln=ln), {}, cgi.escape(author)) if len(authors) > CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD: out += "<em>et al</em>" for date in dates: out += " %s." % cgi.escape(date) for rn in rns: out += """ <small class="quicknote">[%(rn)s]</small>""" % {'rn' : cgi.escape(rn)} for abstract in abstracts: out += "<br /><small>%(abstract)s [...]</small>" % {'abstract' : cgi.escape(abstract[:1 + string.find(abstract, '.')]) } for idx in range(0, len(urls_u)): out += """<br /><small class="note"><a class="note" href="%(url)s">%(name)s</a></small>""" % { 'url' : urls_u[idx], 'name' : urls_u[idx] } return out def tmpl_search_in_bibwords(self, p, f, ln, nearest_box): """ Displays the *Words like current ones* links for a search Parameters: - 'p' *string* - Current search words - 'f' *string* - the fields in which the search was done - 'nearest_box' *string* - the HTML code for the "nearest_terms" box - most probably from a create_nearest_terms_box call """ # load the right message language _ = gettext_set_language(ln) out = '<p>' if f: out += _("Words nearest to %(x_word)s inside %(x_field)s in any collection are:") % {'x_word': '<em>' + cgi.escape(p) + '</em>', 'x_field': '<em>' + cgi.escape(f) + '</em>'} else: out += _("Words nearest to %(x_word)s in any collection are:") % {'x_word': '<em>' + cgi.escape(p) + '</em>'} out += '<br />' + nearest_box + '</p>' return out def tmpl_nearest_term_box(self, p, ln, f, terminfo, intro): """ Displays the *Nearest search terms* box Parameters: - 'p' *string* - Current search words - 'f' *string* - a collection description (if the search has been completed in a collection) - 'ln' *string* - The language to display - 'terminfo': tuple (term, hits, argd) for each near term - 'intro' *string* - the intro HTML to prefix the box with """ out = '''<table class="nearesttermsbox" cellpadding="0" cellspacing="0" border="0">''' for term, hits, argd in terminfo: if hits: hitsinfo = str(hits) else: hitsinfo = '-' term = cgi.escape(term) if term == p: # print search word for orientation: nearesttermsboxbody_class = "nearesttermsboxbodyselected" if hits > 0: term = create_html_link(self.build_search_url(argd), {}, term, {'class': "nearesttermsselected"}) else: nearesttermsboxbody_class = "nearesttermsboxbody" term = create_html_link(self.build_search_url(argd), {}, term, {'class': "nearestterms"}) out += '''\ <tr> <td class="%(nearesttermsboxbody_class)s" align="right">%(hits)s</td> <td class="%(nearesttermsboxbody_class)s" width="15"> </td> <td class="%(nearesttermsboxbody_class)s" align="left">%(term)s</td> </tr> ''' % {'hits': hitsinfo, 'nearesttermsboxbody_class': nearesttermsboxbody_class, 'term': term} out += "</table>" return intro + "<blockquote>" + out + "</blockquote>" def tmpl_browse_pattern(self, f, fn, ln, browsed_phrases_in_colls, colls, rg): """ Displays the *Nearest search terms* box Parameters: - 'f' *string* - field (*not* i18nized) - 'fn' *string* - field name (i18nized) - 'ln' *string* - The language to display - 'browsed_phrases_in_colls' *array* - the phrases to display - 'colls' *array* - the list of collection parameters of the search (c's) - 'rg' *int* - the number of records """ # load the right message language _ = gettext_set_language(ln) out = """<table class="searchresultsbox"> <thead> <tr> <th class="searchresultsboxheader" style="text-align: right;" width="15"> %(hits)s </th> <th class="searchresultsboxheader" width="15"> </th> <th class="searchresultsboxheader" style="text-align: left;"> %(fn)s </th> </tr> </thead> <tbody>""" % { 'hits' : _("Hits"), 'fn' : cgi.escape(fn) } if len(browsed_phrases_in_colls) == 1: # one hit only found: phrase, nbhits = browsed_phrases_in_colls[0][0], browsed_phrases_in_colls[0][1] query = {'c': colls, 'ln': ln, 'p': '"%s"' % phrase.replace('"', '\\"'), 'f': f, 'rg' : rg} out += """<tr> <td class="searchresultsboxbody" style="text-align: right;"> %(nbhits)s </td> <td class="searchresultsboxbody" width="15"> </td> <td class="searchresultsboxbody" style="text-align: left;"> %(link)s </td> </tr>""" % {'nbhits': nbhits, 'link': create_html_link(self.build_search_url(query), {}, cgi.escape(phrase))} elif len(browsed_phrases_in_colls) > 1: # first display what was found but the last one: for phrase, nbhits in browsed_phrases_in_colls[:-1]: query = {'c': colls, 'ln': ln, 'p': '"%s"' % phrase.replace('"', '\\"'), 'f': f, 'rg' : rg} out += """<tr> <td class="searchresultsboxbody" style="text-align: right;"> %(nbhits)s </td> <td class="searchresultsboxbody" width="15"> </td> <td class="searchresultsboxbody" style="text-align: left;"> %(link)s </td> </tr>""" % {'nbhits' : nbhits, 'link': create_html_link(self.build_search_url(query), {}, cgi.escape(phrase))} # now display last hit as "previous term": phrase, nbhits = browsed_phrases_in_colls[0] query_previous = {'c': colls, 'ln': ln, 'p': '"%s"' % phrase.replace('"', '\\"'), 'f': f, 'rg' : rg} # now display last hit as "next term": phrase, nbhits = browsed_phrases_in_colls[-1] query_next = {'c': colls, 'ln': ln, 'p': '"%s"' % phrase.replace('"', '\\"'), 'f': f, 'rg' : rg} out += """<tr><td colspan="2" class="normal"> </td> <td class="normal"> %(link_previous)s <img src="%(siteurl)s/img/sp.gif" alt="" border="0" /> <img src="%(siteurl)s/img/sn.gif" alt="" border="0" /> %(link_next)s </td> </tr>""" % {'link_previous': create_html_link(self.build_search_url(query_previous, action='browse'), {}, _("Previous")), 'link_next': create_html_link(self.build_search_url(query_next, action='browse'), {}, _("next")), 'siteurl' : CFG_SITE_URL} out += """</tbody> </table>""" return out def tmpl_search_box(self, ln, aas, cc, cc_intl, ot, sp, action, fieldslist, f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2, rm, p, f, coll_selects, d1y, d2y, d1m, d2m, d1d, d2d, dt, sort_fields, sf, so, ranks, sc, rg, formats, of, pl, jrec, ec, show_colls=True, show_title=True): """ Displays the *Nearest search terms* box Parameters: - 'ln' *string* - The language to display - 'aas' *bool* - Should we display an advanced search box? -1 -> 1, from simpler to more advanced - 'cc_intl' *string* - the i18nized current collection name, used for display - 'cc' *string* - the internal current collection name - 'ot', 'sp' *string* - hidden values - 'action' *string* - the action demanded by the user - 'fieldslist' *list* - the list of all fields available, for use in select within boxes in advanced search - 'p, f, f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2, op3, rm' *strings* - the search parameters - 'coll_selects' *array* - a list of lists, each containing the collections selects to display - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates - 'dt' *string* - the dates' types (creation dates, modification dates) - 'sort_fields' *array* - the select information for the sort fields - 'sf' *string* - the currently selected sort field - 'so' *string* - the currently selected sort order ("a" or "d") - 'ranks' *array* - ranking methods - 'rm' *string* - selected ranking method - 'sc' *string* - split by collection or not - 'rg' *string* - selected results/page - 'formats' *array* - available output formats - 'of' *string* - the selected output format - 'pl' *string* - `limit to' search pattern - show_colls *bool* - propose coll selection box? - show_title *bool* show cc_intl in page title? """ # load the right message language _ = gettext_set_language(ln) # These are hidden fields the user does not manipulate # directly if aas == -1: argd = drop_default_urlargd({ 'ln': ln, 'aas': aas, 'ot': ot, 'sp': sp, 'ec': ec, }, self.search_results_default_urlargd) else: argd = drop_default_urlargd({ 'cc': cc, 'ln': ln, 'aas': aas, 'ot': ot, 'sp': sp, 'ec': ec, }, self.search_results_default_urlargd) out = "" if show_title: # display cc name if asked for out += ''' <h1 class="headline">%(ccname)s</h1>''' % {'ccname' : cgi.escape(cc_intl), } out += ''' <form name="search" action="%(siteurl)s/search" method="get"> ''' % {'siteurl' : CFG_SITE_URL} # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) leadingtext = _("Search") if action == 'browse': leadingtext = _("Browse") if aas == 1: # print Advanced Search form: # define search box elements: out += ''' <table class="searchbox advancedsearch"> <thead> <tr> <th colspan="3" class="searchboxheader"> %(leading)s: </th> </tr> </thead> <tbody> <tr valign="top" style="white-space:nowrap;"> <td class="searchboxbody">%(matchbox1)s <input type="text" name="p1" size="%(sizepattern)d" value="%(p1)s" class="advancedsearchfield"/> </td> <td class="searchboxbody">%(searchwithin1)s</td> <td class="searchboxbody">%(andornot1)s</td> </tr> <tr valign="top"> <td class="searchboxbody">%(matchbox2)s <input type="text" name="p2" size="%(sizepattern)d" value="%(p2)s" class="advancedsearchfield"/> </td> <td class="searchboxbody">%(searchwithin2)s</td> <td class="searchboxbody">%(andornot2)s</td> </tr> <tr valign="top"> <td class="searchboxbody">%(matchbox3)s <input type="text" name="p3" size="%(sizepattern)d" value="%(p3)s" class="advancedsearchfield"/> </td> <td class="searchboxbody">%(searchwithin3)s</td> <td class="searchboxbody" style="white-space:nowrap;"> <input class="formbutton" type="submit" name="action_search" value="%(search)s" /> <input class="formbutton" type="submit" name="action_browse" value="%(browse)s" /> </td> </tr> <tr valign="bottom"> <td colspan="3" align="right" class="searchboxbody"> <small> <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a> :: %(simple_search)s </small> </td> </tr> </tbody> </table> ''' % { 'simple_search': create_html_link(self.build_search_url(p=p1, f=f1, rm=rm, cc=cc, ln=ln, jrec=jrec, rg=rg), {}, _("Simple Search")), 'leading' : leadingtext, 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, 'matchbox1' : self.tmpl_matchtype_box('m1', m1, ln=ln), 'p1' : cgi.escape(p1, 1), 'searchwithin1' : self.tmpl_searchwithin_select( ln=ln, fieldname='f1', selected=f1, values=self._add_mark_to_field(value=f1, fields=fieldslist, ln=ln) ), 'andornot1' : self.tmpl_andornot_box( name='op1', value=op1, ln=ln ), 'matchbox2' : self.tmpl_matchtype_box('m2', m2, ln=ln), 'p2' : cgi.escape(p2, 1), 'searchwithin2' : self.tmpl_searchwithin_select( ln=ln, fieldname='f2', selected=f2, values=self._add_mark_to_field(value=f2, fields=fieldslist, ln=ln) ), 'andornot2' : self.tmpl_andornot_box( name='op2', value=op2, ln=ln ), 'matchbox3' : self.tmpl_matchtype_box('m3', m3, ln=ln), 'p3' : cgi.escape(p3, 1), 'searchwithin3' : self.tmpl_searchwithin_select( ln=ln, fieldname='f3', selected=f3, values=self._add_mark_to_field(value=f3, fields=fieldslist, ln=ln) ), 'search' : _("Search"), 'browse' : _("Browse"), 'siteurl' : CFG_SITE_URL, 'ln' : ln, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'search_tips': _("Search Tips") } elif aas == 0: # print Simple Search form: out += ''' <table class="searchbox simplesearch"> <thead> <tr> <th colspan="3" class="searchboxheader"> %(leading)s: </th> </tr> </thead> <tbody> <tr valign="top"> <td class="searchboxbody"><input type="text" name="p" size="%(sizepattern)d" value="%(p)s" class="simplesearchfield"/></td> <td class="searchboxbody">%(searchwithin)s</td> <td class="searchboxbody"> <input class="formbutton" type="submit" name="action_search" value="%(search)s" /> <input class="formbutton" type="submit" name="action_browse" value="%(browse)s" /> </td> </tr> <tr valign="bottom"> <td colspan="3" align="right" class="searchboxbody"> <small> <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a> :: %(advanced_search)s </small> </td> </tr> </tbody> </table> ''' % { 'advanced_search': create_html_link(self.build_search_url(p1=p, f1=f, rm=rm, aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), cc=cc, jrec=jrec, ln=ln, rg=rg), {}, _("Advanced Search")), 'leading' : leadingtext, 'sizepattern' : CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, 'p' : cgi.escape(p, 1), 'searchwithin' : self.tmpl_searchwithin_select( ln=ln, fieldname='f', selected=f, values=self._add_mark_to_field(value=f, fields=fieldslist, ln=ln) ), 'search' : _("Search"), 'browse' : _("Browse"), 'siteurl' : CFG_SITE_URL, 'ln' : ln, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'search_tips': _("Search Tips") } else: # EXPERIMENTAL # print light search form: search_in = '' if cc_intl != CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME): search_in = ''' <input type="radio" name="cc" value="%(collection_id)s" id="searchCollection" checked="checked"/> <label for="searchCollection">%(search_in_collection_name)s</label> <input type="radio" name="cc" value="%(root_collection_name)s" id="searchEverywhere" /> <label for="searchEverywhere">%(search_everywhere)s</label> ''' % {'search_in_collection_name': _("Search in %(x_collection_name)s") % \ {'x_collection_name': cgi.escape(cc_intl)}, 'collection_id': cc, 'root_collection_name': CFG_SITE_NAME, 'search_everywhere': _("Search everywhere")} out += ''' <table class="searchbox lightsearch"> <tr valign="top"> <td class="searchboxbody"><input type="text" name="p" size="%(sizepattern)d" value="%(p)s" class="lightsearchfield"/></td> <td class="searchboxbody"> <input class="formbutton" type="submit" name="action_search" value="%(search)s" /> </td> <td class="searchboxbody" align="left" rowspan="2" valign="top"> <small><small> <a href="%(siteurl)s/help/search-tips%(langlink)s">%(search_tips)s</a><br/> %(advanced_search)s </td> </tr> </table> <small>%(search_in)s</small> ''' % { 'sizepattern' : CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH, 'advanced_search': create_html_link(self.build_search_url(p1=p, f1=f, rm=rm, aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), cc=cc, jrec=jrec, ln=ln, rg=rg), {}, _("Advanced Search")), 'leading' : leadingtext, 'p' : cgi.escape(p, 1), 'searchwithin' : self.tmpl_searchwithin_select( ln=ln, fieldname='f', selected=f, values=self._add_mark_to_field(value=f, fields=fieldslist, ln=ln) ), 'search' : _("Search"), 'browse' : _("Browse"), 'siteurl' : CFG_SITE_URL, 'ln' : ln, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'search_tips': _("Search Tips"), 'search_in': search_in } ## secondly, print Collection(s) box: if show_colls and aas > -1: # display collections only if there is more than one selects = '' for sel in coll_selects: selects += self.tmpl_select(fieldname='c', values=sel) out += """ <table class="searchbox"> <thead> <tr> <th colspan="3" class="searchboxheader"> %(leading)s %(msg_coll)s: </th> </tr> </thead> <tbody> <tr valign="bottom"> <td valign="top" class="searchboxbody"> %(colls)s </td> </tr> </tbody> </table> """ % { 'leading' : leadingtext, 'msg_coll' : _("collections"), 'colls' : selects, } ## thirdly, print search limits, if applicable: if action != _("Browse") and pl: out += """<table class="searchbox"> <thead> <tr> <th class="searchboxheader"> %(limitto)s </th> </tr> </thead> <tbody> <tr valign="bottom"> <td class="searchboxbody"> <input type="text" name="pl" size="%(sizepattern)d" value="%(pl)s" /> </td> </tr> </tbody> </table>""" % { 'limitto' : _("Limit to:"), 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, 'pl' : cgi.escape(pl, 1), } ## fourthly, print from/until date boxen, if applicable: if action == _("Browse") or (d1y == 0 and d1m == 0 and d1d == 0 and d2y == 0 and d2m == 0 and d2d == 0): pass # do not need it else: cell_6_a = self.tmpl_inputdatetype(dt, ln) + self.tmpl_inputdate("d1", ln, d1y, d1m, d1d) cell_6_b = self.tmpl_inputdate("d2", ln, d2y, d2m, d2d) out += """<table class="searchbox"> <thead> <tr> <th class="searchboxheader"> %(added)s </th> <th class="searchboxheader"> %(until)s </th> </tr> </thead> <tbody> <tr valign="bottom"> <td class="searchboxbody">%(added_or_modified)s %(date1)s</td> <td class="searchboxbody">%(date2)s</td> </tr> </tbody> </table>""" % { 'added' : _("Added/modified since:"), 'until' : _("until:"), 'added_or_modified': self.tmpl_inputdatetype(dt, ln), 'date1' : self.tmpl_inputdate("d1", ln, d1y, d1m, d1d), 'date2' : self.tmpl_inputdate("d2", ln, d2y, d2m, d2d), } ## fifthly, print Display results box, including sort/rank, formats, etc: if action != _("Browse") and aas > -1: rgs = [] for i in [10, 25, 50, 100, 250, 500]: if i <= CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS: rgs.append({ 'value' : i, 'text' : "%d %s" % (i, _("results"))}) # enrich sort fields list if we are sorting by some MARC tag: sort_fields = self._add_mark_to_field(value=sf, fields=sort_fields, ln=ln) # create sort by HTML box: out += """<table class="searchbox"> <thead> <tr> <th class="searchboxheader"> %(sort_by)s </th> <th class="searchboxheader"> %(display_res)s </th> <th class="searchboxheader"> %(out_format)s </th> </tr> </thead> <tbody> <tr valign="bottom"> <td valign="top" class="searchboxbody"> %(select_sf)s %(select_so)s %(select_rm)s </td> <td valign="top" class="searchboxbody"> %(select_rg)s %(select_sc)s </td> <td valign="top" class="searchboxbody">%(select_of)s</td> </tr> </tbody> </table>""" % { 'sort_by' : _("Sort by:"), 'display_res' : _("Display results:"), 'out_format' : _("Output format:"), 'select_sf' : self.tmpl_select(fieldname='sf', values=sort_fields, selected=sf, css_class='address'), 'select_so' : self.tmpl_select(fieldname='so', values=[{ 'value' : 'a', 'text' : _("asc.") }, { 'value' : 'd', 'text' : _("desc.") }], selected=so, css_class='address'), 'select_rm' : self.tmpl_select(fieldname='rm', values=ranks, selected=rm, css_class='address'), 'select_rg' : self.tmpl_select(fieldname='rg', values=rgs, selected=rg, css_class='address'), 'select_sc' : self.tmpl_select(fieldname='sc', values=[{ 'value' : 0, 'text' : _("single list") }, { 'value' : 1, 'text' : _("split by collection") }], selected=sc, css_class='address'), 'select_of' : self.tmpl_select( fieldname='of', selected=of, values=self._add_mark_to_field(value=of, fields=formats, chars=3, ln=ln), css_class='address'), } ## last but not least, print end of search box: out += """</form>""" return out def tmpl_input_hidden(self, name, value): "Produces the HTML code for a hidden field " if isinstance(value, list): list_input = [self.tmpl_input_hidden(name, val) for val in value] return "\n".join(list_input) # # Treat `as', `aas' arguments specially: if name == 'aas': name = 'as' return """<input type="hidden" name="%(name)s" value="%(value)s" />""" % { 'name' : cgi.escape(str(name), 1), 'value' : cgi.escape(str(value), 1), } def _add_mark_to_field(self, value, fields, ln, chars=1): """Adds the current value as a MARC tag in the fields array Useful for advanced search""" # load the right message language _ = gettext_set_language(ln) out = fields if value and str(value[0:chars]).isdigit(): out.append({'value' : value, 'text' : str(value) + " " + _("MARC tag") }) return out def tmpl_search_pagestart(self, ln) : "page start for search page. Will display after the page header" return """<div class="pagebody"><div class="pagebodystripemiddle">""" def tmpl_search_pageend(self, ln) : "page end for search page. Will display just before the page footer" return """</div></div>""" def tmpl_print_warning(self, msg, type, prologue, epilogue): """Prints warning message and flushes output. Parameters: - 'msg' *string* - The message string - 'type' *string* - the warning type - 'prologue' *string* - HTML code to display before the warning - 'epilogue' *string* - HTML code to display after the warning """ out = '\n%s<span class="quicknote">' % (prologue) if type: out += '%s: ' % type out += '%s</span>%s' % (msg, epilogue) return out def tmpl_print_search_info(self, ln, middle_only, collection, collection_name, collection_id, aas, sf, so, rm, rg, nb_found, of, ot, p, f, f1, f2, f3, m1, m2, m3, op1, op2, p1, p2, p3, d1y, d1m, d1d, d2y, d2m, d2d, dt, all_fieldcodes, cpu_time, pl_in_url, jrec, sc, sp): """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time. Also, prints navigation links (beg/next/prev/end) inside the results set. If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links. This is suitable for displaying navigation links at the bottom of the search results page. Parameters: - 'ln' *string* - The language to display - 'middle_only' *bool* - Only display parts of the interface - 'collection' *string* - the collection name - 'collection_name' *string* - the i18nized current collection name - 'aas' *bool* - if we display the advanced search interface - 'sf' *string* - the currently selected sort format - 'so' *string* - the currently selected sort order ("a" or "d") - 'rm' *string* - selected ranking method - 'rg' *int* - selected results/page - 'nb_found' *int* - number of results found - 'of' *string* - the selected output format - 'ot' *string* - hidden values - 'p' *string* - Current search words - 'f' *string* - the fields in which the search was done - 'f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2' *strings* - the search parameters - 'jrec' *int* - number of first record on this page - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates - 'dt' *string* the dates' type (creation date, modification date) - 'all_fieldcodes' *array* - all the available fields - 'cpu_time' *float* - the time of the query in seconds """ # load the right message language _ = gettext_set_language(ln) out = "" # left table cells: print collection name if not middle_only: out += ''' <a name="%(collection_id)s"></a> <form action="%(siteurl)s/search" method="get"> <table class="searchresultsbox"><tr><td class="searchresultsboxheader" align="left"> <strong><big>%(collection_link)s</big></strong></td> ''' % { 'collection_id': collection_id, 'siteurl' : CFG_SITE_URL, 'collection_link': create_html_link(self.build_search_interface_url(c=collection, aas=aas, ln=ln), {}, cgi.escape(collection_name)) } else: out += """ <form action="%(siteurl)s/search" method="get"><div align="center"> """ % { 'siteurl' : CFG_SITE_URL } # middle table cell: print beg/next/prev/end arrows: if not middle_only: out += """<td class="searchresultsboxheader" align="center"> %(recs_found)s """ % { 'recs_found' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>') } else: out += "<small>" if nb_found > rg: out += "" + cgi.escape(collection_name) + " : " + _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>') + " " if nb_found > rg: # navig.arrows are needed, since we have many hits query = {'p': p, 'f': f, 'cc': collection, 'sf': sf, 'so': so, 'sp': sp, 'rm': rm, 'of': of, 'ot': ot, 'aas': aas, 'ln': ln, 'p1': p1, 'p2': p2, 'p3': p3, 'f1': f1, 'f2': f2, 'f3': f3, 'm1': m1, 'm2': m2, 'm3': m3, 'op1': op1, 'op2': op2, 'sc': 0, 'd1y': d1y, 'd1m': d1m, 'd1d': d1d, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d, 'dt': dt, } # @todo here def img(gif, txt): return '<img src="%(siteurl)s/img/%(gif)s.gif" alt="%(txt)s" border="0" />' % { 'txt': txt, 'gif': gif, 'siteurl': CFG_SITE_URL} if jrec - rg > 1: out += create_html_link(self.build_search_url(query, jrec=1, rg=rg), {}, img('sb', _("begin")), {'class': 'img'}) if jrec > 1: out += create_html_link(self.build_search_url(query, jrec=max(jrec - rg, 1), rg=rg), {}, img('sp', _("previous")), {'class': 'img'}) if jrec + rg - 1 < nb_found: out += "%d - %d" % (jrec, jrec + rg - 1) else: out += "%d - %d" % (jrec, nb_found) if nb_found >= jrec + rg: out += create_html_link(self.build_search_url(query, jrec=jrec + rg, rg=rg), {}, img('sn', _("next")), {'class':'img'}) if nb_found >= jrec + rg + rg: out += create_html_link(self.build_search_url(query, jrec=nb_found - rg + 1, rg=rg), {}, img('se', _("end")), {'class': 'img'}) # still in the navigation part cc = collection sc = 0 for var in ['p', 'cc', 'f', 'sf', 'so', 'of', 'rg', 'aas', 'ln', 'p1', 'p2', 'p3', 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 'op1', 'op2', 'sc', 'd1y', 'd1m', 'd1d', 'd2y', 'd2m', 'd2d', 'dt']: out += self.tmpl_input_hidden(name=var, value=vars()[var]) for var in ['ot', 'sp', 'rm']: if vars()[var]: out += self.tmpl_input_hidden(name=var, value=vars()[var]) if pl_in_url: fieldargs = cgi.parse_qs(pl_in_url) for fieldcode in all_fieldcodes: # get_fieldcodes(): if fieldargs.has_key(fieldcode): for val in fieldargs[fieldcode]: out += self.tmpl_input_hidden(name=fieldcode, value=val) out += """ %(jump)s <input type="text" name="jrec" size="4" value="%(jrec)d" />""" % { 'jump' : _("jump to record:"), 'jrec' : jrec, } if not middle_only: out += "</td>" else: out += "</small>" # right table cell: cpu time info if not middle_only: if cpu_time > -1: out += """<td class="searchresultsboxheader" align="right"><small>%(time)s</small> </td>""" % { 'time' : _("Search took %s seconds.") % ('%.2f' % cpu_time), } out += "</tr></table>" else: out += "</div>" out += "</form>" return out def tmpl_print_hosted_search_info(self, ln, middle_only, collection, collection_name, collection_id, aas, sf, so, rm, rg, nb_found, of, ot, p, f, f1, f2, f3, m1, m2, m3, op1, op2, p1, p2, p3, d1y, d1m, d1d, d2y, d2m, d2d, dt, all_fieldcodes, cpu_time, pl_in_url, jrec, sc, sp): """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time. Also, prints navigation links (beg/next/prev/end) inside the results set. If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links. This is suitable for displaying navigation links at the bottom of the search results page. Parameters: - 'ln' *string* - The language to display - 'middle_only' *bool* - Only display parts of the interface - 'collection' *string* - the collection name - 'collection_name' *string* - the i18nized current collection name - 'aas' *bool* - if we display the advanced search interface - 'sf' *string* - the currently selected sort format - 'so' *string* - the currently selected sort order ("a" or "d") - 'rm' *string* - selected ranking method - 'rg' *int* - selected results/page - 'nb_found' *int* - number of results found - 'of' *string* - the selected output format - 'ot' *string* - hidden values - 'p' *string* - Current search words - 'f' *string* - the fields in which the search was done - 'f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2' *strings* - the search parameters - 'jrec' *int* - number of first record on this page - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates - 'dt' *string* the dates' type (creation date, modification date) - 'all_fieldcodes' *array* - all the available fields - 'cpu_time' *float* - the time of the query in seconds """ # load the right message language _ = gettext_set_language(ln) out = "" # left table cells: print collection name if not middle_only: out += ''' <a name="%(collection_id)s"></a> <form action="%(siteurl)s/search" method="get"> <table class="searchresultsbox"><tr><td class="searchresultsboxheader" align="left"> <strong><big>%(collection_link)s</big></strong></td> ''' % { 'collection_id': collection_id, 'siteurl' : CFG_SITE_URL, 'collection_link': create_html_link(self.build_search_interface_url(c=collection, aas=aas, ln=ln), {}, cgi.escape(collection_name)) } else: out += """ <form action="%(siteurl)s/search" method="get"><div align="center"> """ % { 'siteurl' : CFG_SITE_URL } # middle table cell: print beg/next/prev/end arrows: if not middle_only: # in case we have a hosted collection that timed out do not print its number of records, as it is yet unknown if nb_found != -963: out += """<td class="searchresultsboxheader" align="center"> %(recs_found)s """ % { 'recs_found' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>') } #elif nb_found = -963: # out += """<td class="searchresultsboxheader" align="center"> # %(recs_found)s """ % { # 'recs_found' : _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>') # } else: out += "<small>" # we do not care about timed out hosted collections here, because the bumber of records found will never be bigger # than rg anyway, since it's negative if nb_found > rg: out += "" + cgi.escape(collection_name) + " : " + _("%s records found") % ('<strong>' + self.tmpl_nice_number(nb_found, ln) + '</strong>') + " " if nb_found > rg: # navig.arrows are needed, since we have many hits query = {'p': p, 'f': f, 'cc': collection, 'sf': sf, 'so': so, 'sp': sp, 'rm': rm, 'of': of, 'ot': ot, 'aas': aas, 'ln': ln, 'p1': p1, 'p2': p2, 'p3': p3, 'f1': f1, 'f2': f2, 'f3': f3, 'm1': m1, 'm2': m2, 'm3': m3, 'op1': op1, 'op2': op2, 'sc': 0, 'd1y': d1y, 'd1m': d1m, 'd1d': d1d, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d, 'dt': dt, } # @todo here def img(gif, txt): return '<img src="%(siteurl)s/img/%(gif)s.gif" alt="%(txt)s" border="0" />' % { 'txt': txt, 'gif': gif, 'siteurl': CFG_SITE_URL} if jrec - rg > 1: out += create_html_link(self.build_search_url(query, jrec=1, rg=rg), {}, img('sb', _("begin")), {'class': 'img'}) if jrec > 1: out += create_html_link(self.build_search_url(query, jrec=max(jrec - rg, 1), rg=rg), {}, img('sp', _("previous")), {'class': 'img'}) if jrec + rg - 1 < nb_found: out += "%d - %d" % (jrec, jrec + rg - 1) else: out += "%d - %d" % (jrec, nb_found) if nb_found >= jrec + rg: out += create_html_link(self.build_search_url(query, jrec=jrec + rg, rg=rg), {}, img('sn', _("next")), {'class':'img'}) if nb_found >= jrec + rg + rg: out += create_html_link(self.build_search_url(query, jrec=nb_found - rg + 1, rg=rg), {}, img('se', _("end")), {'class': 'img'}) # still in the navigation part cc = collection sc = 0 for var in ['p', 'cc', 'f', 'sf', 'so', 'of', 'rg', 'aas', 'ln', 'p1', 'p2', 'p3', 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 'op1', 'op2', 'sc', 'd1y', 'd1m', 'd1d', 'd2y', 'd2m', 'd2d', 'dt']: out += self.tmpl_input_hidden(name=var, value=vars()[var]) for var in ['ot', 'sp', 'rm']: if vars()[var]: out += self.tmpl_input_hidden(name=var, value=vars()[var]) if pl_in_url: fieldargs = cgi.parse_qs(pl_in_url) for fieldcode in all_fieldcodes: # get_fieldcodes(): if fieldargs.has_key(fieldcode): for val in fieldargs[fieldcode]: out += self.tmpl_input_hidden(name=fieldcode, value=val) out += """ %(jump)s <input type="text" name="jrec" size="4" value="%(jrec)d" />""" % { 'jump' : _("jump to record:"), 'jrec' : jrec, } if not middle_only: out += "</td>" else: out += "</small>" # right table cell: cpu time info if not middle_only: if cpu_time > -1: out += """<td class="searchresultsboxheader" align="right"><small>%(time)s</small> </td>""" % { 'time' : _("Search took %s seconds.") % ('%.2f' % cpu_time), } out += "</tr></table>" else: out += "</div>" out += "</form>" return out def tmpl_nice_number(self, number, ln=CFG_SITE_LANG, thousands_separator=',', max_ndigits_after_dot=None): """ Return nicely printed number NUMBER in language LN using given THOUSANDS_SEPARATOR character. If max_ndigits_after_dot is specified and the number is float, the number is rounded by taking in consideration up to max_ndigits_after_dot digit after the dot. This version does not pay attention to locale. See tmpl_nice_number_via_locale(). """ if type(number) is float: if max_ndigits_after_dot is not None: number = round(number, max_ndigits_after_dot) int_part, frac_part = str(number).split('.') return '%s.%s' % (self.tmpl_nice_number(int(int_part), ln, thousands_separator), frac_part) else: chars_in = list(str(number)) number = len(chars_in) chars_out = [] for i in range(0, number): if i % 3 == 0 and i != 0: chars_out.append(thousands_separator) chars_out.append(chars_in[number - i - 1]) chars_out.reverse() return ''.join(chars_out) def tmpl_nice_number_via_locale(self, number, ln=CFG_SITE_LANG): """ Return nicely printed number NUM in language LN using the locale. See also version tmpl_nice_number(). """ if number is None: return None # Temporarily switch the numeric locale to the requested one, and format the number # In case the system has no locale definition, use the vanilla form ol = locale.getlocale(locale.LC_NUMERIC) try: locale.setlocale(locale.LC_NUMERIC, self.tmpl_localemap.get(ln, self.tmpl_default_locale)) except locale.Error: return str(number) try: number = locale.format('%d', number, True) except TypeError: return str(number) locale.setlocale(locale.LC_NUMERIC, ol) return number def tmpl_record_format_htmlbrief_header(self, ln): """Returns the header of the search results list when output is html brief. Note that this function is called for each collection results when 'split by collection' is enabled. See also: tmpl_record_format_htmlbrief_footer, tmpl_record_format_htmlbrief_body Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) out = """ <form action="%(siteurl)s/yourbaskets/add" method="post"> <table> """ % { 'siteurl' : CFG_SITE_URL, } return out def tmpl_record_format_htmlbrief_footer(self, ln, display_add_to_basket=True): """Returns the footer of the search results list when output is html brief. Note that this function is called for each collection results when 'split by collection' is enabled. See also: tmpl_record_format_htmlbrief_header(..), tmpl_record_format_htmlbrief_body(..) Parameters: - 'ln' *string* - The language to display - 'display_add_to_basket' *bool* - whether to display Add-to-basket button """ # load the right message language _ = gettext_set_language(ln) out = """</table> <br /> <input type="hidden" name="colid" value="0" /> %(add_to_basket)s </form>""" % { 'add_to_basket': display_add_to_basket and """<input class="formbutton" type="submit" name="action" value="%s" />""" % _("Add to basket") or "", } return out def tmpl_record_format_htmlbrief_body(self, ln, recid, row_number, relevance, record, relevances_prologue, relevances_epilogue, display_add_to_basket=True): """Returns the html brief format of one record. Used in the search results list for each record. See also: tmpl_record_format_htmlbrief_header(..), tmpl_record_format_htmlbrief_footer(..) Parameters: - 'ln' *string* - The language to display - 'row_number' *int* - The position of this record in the list - 'recid' *int* - The recID - 'relevance' *string* - The relevance of the record - 'record' *string* - The formatted record - 'relevances_prologue' *string* - HTML code to prepend the relevance indicator - 'relevances_epilogue' *string* - HTML code to append to the relevance indicator (used mostly for formatting) """ # load the right message language _ = gettext_set_language(ln) checkbox_for_baskets = """<input name="recid" type="checkbox" value="%(recid)s" />""" % \ {'recid': recid, } if not display_add_to_basket: checkbox_for_baskets = '' out = """ <tr><td valign="top" align="right" style="white-space: nowrap;"> %(checkbox_for_baskets)s <abbr class="unapi-id" title="%(recid)s"></abbr> %(number)s. """ % {'recid': recid, 'number': row_number, 'checkbox_for_baskets': checkbox_for_baskets} if relevance: out += """<br /><div class="rankscoreinfo"><a title="rank score">%(prologue)s%(relevance)s%(epilogue)s</a></div>""" % { 'prologue' : relevances_prologue, 'epilogue' : relevances_epilogue, 'relevance' : relevance } out += """</td><td valign="top">%s</td></tr>""" % record return out def tmpl_print_results_overview(self, ln, results_final_nb_total, cpu_time, results_final_nb, colls, ec, hosted_colls_potential_results_p=False): """Prints results overview box with links to particular collections below. Parameters: - 'ln' *string* - The language to display - 'results_final_nb_total' *int* - The total number of hits for the query - 'colls' *array* - The collections with hits, in the format: - 'coll[code]' *string* - The code of the collection (canonical name) - 'coll[name]' *string* - The display name of the collection - 'results_final_nb' *array* - The number of hits, indexed by the collection codes: - 'cpu_time' *string* - The time the query took - 'url_args' *string* - The rest of the search query - 'ec' *array* - selected external collections - 'hosted_colls_potential_results_p' *boolean* - check if there are any hosted collections searches that timed out during the pre-search """ if len(colls) == 1 and not ec: # if one collection only and no external collections, print nothing: return "" # load the right message language _ = gettext_set_language(ln) # first find total number of hits: # if there were no hosted collections that timed out during the pre-search print out the exact number of records found if not hosted_colls_potential_results_p: out = """<table class="searchresultsbox"> <thead><tr><th class="searchresultsboxheader">%(founds)s</th></tr></thead> <tbody><tr><td class="searchresultsboxbody"> """ % { 'founds' : _("%(x_fmt_open)sResults overview:%(x_fmt_close)s Found %(x_nb_records)s records in %(x_nb_seconds)s seconds.") % \ {'x_fmt_open': '<strong>', 'x_fmt_close': '</strong>', 'x_nb_records': '<strong>' + self.tmpl_nice_number(results_final_nb_total, ln) + '</strong>', 'x_nb_seconds': '%.2f' % cpu_time} } # if there were (only) hosted_collections that timed out during the pre-search print out a fuzzier message else: if results_final_nb_total == 0: out = """<table class="searchresultsbox"> <thead><tr><th class="searchresultsboxheader">%(founds)s</th></tr></thead> <tbody><tr><td class="searchresultsboxbody"> """ % { 'founds' : _("%(x_fmt_open)sResults overview%(x_fmt_close)s") % \ {'x_fmt_open': '<strong>', 'x_fmt_close': '</strong>'} } elif results_final_nb_total > 0: out = """<table class="searchresultsbox"> <thead><tr><th class="searchresultsboxheader">%(founds)s</th></tr></thead> <tbody><tr><td class="searchresultsboxbody"> """ % { 'founds' : _("%(x_fmt_open)sResults overview:%(x_fmt_close)s Found at least %(x_nb_records)s records in %(x_nb_seconds)s seconds.") % \ {'x_fmt_open': '<strong>', 'x_fmt_close': '</strong>', 'x_nb_records': '<strong>' + self.tmpl_nice_number(results_final_nb_total, ln) + '</strong>', 'x_nb_seconds': '%.2f' % cpu_time} } # then print hits per collection: for coll in colls: if results_final_nb.has_key(coll['code']) and results_final_nb[coll['code']] > 0: out += """ <strong><a href="#%(coll)s">%(coll_name)s</a></strong>, <a href="#%(coll)s">%(number)s</a><br />""" % \ {'coll' : coll['id'], 'coll_name' : cgi.escape(coll['name']), 'number' : _("%s records found") % \ ('<strong>' + self.tmpl_nice_number(results_final_nb[coll['code']], ln) + '</strong>')} # the following is used for hosted collections that have timed out, # i.e. for which we don't know the exact number of results yet. elif results_final_nb.has_key(coll['code']) and results_final_nb[coll['code']] == -963: out += """ <strong><a href="#%(coll)s">%(coll_name)s</a></strong><br />""" % \ {'coll' : coll['id'], 'coll_name' : cgi.escape(coll['name']), 'number' : _("%s records found") % \ ('<strong>' + self.tmpl_nice_number(results_final_nb[coll['code']], ln) + '</strong>')} out += "</td></tr></tbody></table>" return out def tmpl_print_hosted_results(self, url_and_engine, ln, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS): """Print results of a given search engine. """ _ = gettext_set_language(ln) #url = url_and_engine[0] engine = url_and_engine[1] #name = _(engine.name) db_id = get_collection_id(engine.name) #base_url = engine.base_url out = "" results = engine.parser.parse_and_get_results(None, of=of, req=req, limit=limit, parseonly=True) if len(results) != 0: if of == 'hb': out += """ <form action="%(siteurl)s/yourbaskets/add" method="post"> <input type="hidden" name="colid" value="%(col_db_id)s" /> <table> """ % { 'siteurl' : CFG_SITE_URL, 'col_db_id' : db_id, } else: if of == 'hb': out += """ <table> """ for result in results: out += result.html.replace('>Detailed record<', '>External record<').replace('>Similar records<', '>Similar external records<') if len(results) != 0: if of == 'hb': out += """</table> <br /><input class="formbutton" type="submit" name="action" value="%(basket)s" /> </form>""" % { 'basket' : _("Add to basket") } else: if of == 'hb': out += """ </table> """ # we have already checked if there are results or no, maybe the following if should be removed? if not results: if of.startswith("h"): out = _('No results found...') + '<br />' return out def tmpl_print_searchresultbox(self, header, body): """print a nicely formatted box for search results """ #_ = gettext_set_language(ln) # first find total number of hits: out = '<table class="searchresultsbox"><thead><tr><th class="searchresultsboxheader">' + header + '</th></tr></thead><tbody><tr><td class="searchresultsboxbody">' + body + '</td></tr></tbody></table>' return out def tmpl_search_no_boolean_hits(self, ln, nearestterms): """No hits found, proposes alternative boolean queries Parameters: - 'ln' *string* - The language to display - 'nearestterms' *array* - Parts of the interface to display, in the format: - 'nearestterms[nbhits]' *int* - The resulting number of hits - 'nearestterms[url_args]' *string* - The search parameters - 'nearestterms[p]' *string* - The search terms """ # load the right message language _ = gettext_set_language(ln) out = _("Boolean query returned no hits. Please combine your search terms differently.") out += '''<blockquote><table class="nearesttermsbox" cellpadding="0" cellspacing="0" border="0">''' for term, hits, argd in nearestterms: out += '''\ <tr> <td class="nearesttermsboxbody" align="right">%(hits)s</td> <td class="nearesttermsboxbody" width="15"> </td> <td class="nearesttermsboxbody" align="left"> %(link)s </td> </tr>''' % {'hits' : hits, 'link': create_html_link(self.build_search_url(argd), {}, cgi.escape(term), {'class': "nearestterms"})} out += """</table></blockquote>""" return out def tmpl_similar_author_names(self, authors, ln): """No hits found, proposes alternative boolean queries Parameters: - 'authors': a list of (name, hits) tuples - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) out = '''<a name="googlebox"></a> <table class="googlebox"><tr><th colspan="2" class="googleboxheader">%(similar)s</th></tr>''' % { 'similar' : _("See also: similar author names") } for author, hits in authors: out += '''\ <tr> <td class="googleboxbody">%(nb)d</td> <td class="googleboxbody">%(link)s</td> </tr>''' % {'link': create_html_link( self.build_search_url(p=author, f='author', ln=ln), {}, cgi.escape(author), {'class':"google"}), 'nb' : hits} out += """</table>""" return out def tmpl_print_record_detailed(self, recID, ln): """Displays a detailed on-the-fly record Parameters: - 'ln' *string* - The language to display - 'recID' *int* - The record id """ # okay, need to construct a simple "Detailed record" format of our own: out = "<p> " # secondly, title: titles = get_fieldvalues(recID, "245__a") for title in titles: out += "<p><center><big><strong>%s</strong></big></center></p>" % cgi.escape(title) # thirdly, authors: authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a") if authors: out += "<p><center>" for author in authors: out += '%s; ' % create_html_link(self.build_search_url( ln=ln, p=author, f='author'), {}, cgi.escape(author)) out += "</center></p>" # fourthly, date of creation: dates = get_fieldvalues(recID, "260__c") for date in dates: out += "<p><center><small>%s</small></center></p>" % date # fifthly, abstract: abstracts = get_fieldvalues(recID, "520__a") for abstract in abstracts: out += """<p style="margin-left: 15%%; width: 70%%"> <small><strong>Abstract:</strong> %s</small></p>""" % abstract # fifthly bis, keywords: keywords = get_fieldvalues(recID, "6531_a") if len(keywords): out += """<p style="margin-left: 15%%; width: 70%%"> <small><strong>Keyword(s):</strong>""" for keyword in keywords: out += '%s; ' % create_html_link( self.build_search_url(ln=ln, p=keyword, f='keyword'), {}, cgi.escape(keyword)) out += '</small></p>' # fifthly bis bis, published in: prs_p = get_fieldvalues(recID, "909C4p") prs_v = get_fieldvalues(recID, "909C4v") prs_y = get_fieldvalues(recID, "909C4y") prs_n = get_fieldvalues(recID, "909C4n") prs_c = get_fieldvalues(recID, "909C4c") for idx in range(0, len(prs_p)): out += """<p style="margin-left: 15%%; width: 70%%"> <small><strong>Publ. in:</strong> %s""" % prs_p[idx] if prs_v and prs_v[idx]: out += """<strong>%s</strong>""" % prs_v[idx] if prs_y and prs_y[idx]: out += """(%s)""" % prs_y[idx] if prs_n and prs_n[idx]: out += """, no.%s""" % prs_n[idx] if prs_c and prs_c[idx]: out += """, p.%s""" % prs_c[idx] out += """.</small></p>""" # sixthly, fulltext link: urls_z = get_fieldvalues(recID, "8564_z") urls_u = get_fieldvalues(recID, "8564_u") # we separate the fulltext links and image links for url_u in urls_u: if url_u.endswith('.png'): continue else: link_text = "URL" try: if urls_z[idx]: link_text = urls_z[idx] except IndexError: pass out += """<p style="margin-left: 15%%; width: 70%%"> <small><strong>%s:</strong> <a href="%s">%s</a></small></p>""" % (link_text, urls_u[idx], urls_u[idx]) # print some white space at the end: out += "<br /><br />" return out def tmpl_print_record_list_for_similarity_boxen(self, title, recID_score_list, ln=CFG_SITE_LANG): """Print list of records in the "hs" (HTML Similarity) format for similarity boxes. RECID_SCORE_LIST is a list of (recID1, score1), (recID2, score2), etc. """ from invenio.search_engine import print_record, record_public_p recID_score_list_to_be_printed = [] # firstly find 5 first public records to print: nb_records_to_be_printed = 0 nb_records_seen = 0 while nb_records_to_be_printed < 5 and nb_records_seen < len(recID_score_list) and nb_records_seen < 50: # looking through first 50 records only, picking first 5 public ones (recID, score) = recID_score_list[nb_records_seen] nb_records_seen += 1 if record_public_p(recID): nb_records_to_be_printed += 1 recID_score_list_to_be_printed.append([recID, score]) # secondly print them: out = ''' <table><tr> <td> <table><tr><td class="blocknote">%(title)s</td></tr></table> </td> </tr> <tr> <td><table> ''' % { 'title': cgi.escape(title) } for recid, score in recID_score_list_to_be_printed: out += ''' <tr><td><font class="rankscoreinfo"><a>(%(score)s) </a></font><small> %(info)s</small></td></tr>''' % { 'score': score, 'info' : print_record(recid, format="hs", ln=ln), } out += """</table></td></tr></table> """ return out def tmpl_print_record_brief(self, ln, recID): """Displays a brief record on-the-fly Parameters: - 'ln' *string* - The language to display - 'recID' *int* - The record id """ out = "" # record 'recID' does not exist in format 'format', so print some default format: # firstly, title: titles = get_fieldvalues(recID, "245__a") # secondly, authors: authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a") # thirdly, date of creation: dates = get_fieldvalues(recID, "260__c") # thirdly bis, report numbers: rns = get_fieldvalues(recID, "037__a") rns = get_fieldvalues(recID, "088__a") # fourthly, beginning of abstract: abstracts = get_fieldvalues(recID, "520__a") # fifthly, fulltext link: urls_z = get_fieldvalues(recID, "8564_z") urls_u = get_fieldvalues(recID, "8564_u") # get rid of images images = [] non_image_urls_u = [] for url_u in urls_u: if url_u.endswith('.png'): images.append(url_u) else: non_image_urls_u.append(url_u) ## unAPI identifier out = '<abbr class="unapi-id" title="%s"></abbr>\n' % recID out += self.tmpl_record_body( titles=titles, authors=authors, dates=dates, rns=rns, abstracts=abstracts, urls_u=non_image_urls_u, urls_z=urls_z, ln=ln) return out def tmpl_print_record_brief_links(self, ln, recID, sf='', so='d', sp='', rm='', display_claim_link=False): """Displays links for brief record on-the-fly Parameters: - 'ln' *string* - The language to display - 'recID' *int* - The record id """ from invenio.webcommentadminlib import get_nb_reviews, get_nb_comments # load the right message language _ = gettext_set_language(ln) out = '<div class="moreinfo">' if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: alephsysnos = get_fieldvalues(recID, "970__a") if len(alephsysnos) > 0: alephsysno = alephsysnos[0] out += '<span class="moreinfo">%s</span>' % \ create_html_link(self.build_search_url(recid=alephsysno, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}) else: out += '<span class="moreinfo">%s</span>' % \ create_html_link(self.build_search_url(recid=recID, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}) else: out += '<span class="moreinfo">%s</span>' % \ create_html_link(self.build_search_url(recid=recID, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}) out += '<span class="moreinfo"> - %s</span>' % \ create_html_link(self.build_search_url(p="recid:%d" % recID, rm="wrd", ln=ln), {}, _("Similar records"), {'class': "moreinfo"}) if CFG_BIBRANK_SHOW_CITATION_LINKS: num_timescited = get_cited_by_count(recID) if num_timescited: out += '<span class="moreinfo"> - %s</span>' % \ create_html_link(self.build_search_url(p="refersto:recid:%d" % recID, sf=sf, so=so, sp=sp, rm=rm, ln=ln), {}, num_timescited > 1 and _("Cited by %i records") % num_timescited or _("Cited by 1 record"), {'class': "moreinfo"}) else: out += "<!--not showing citations links-->" if display_claim_link: #Maybe we want not to show the link to who cannot use id? out += '<span class="moreinfo"> - %s</span>' % \ create_html_link(CFG_SITE_URL + '/person/batchprocess', {'mfind_bibref':'claim', 'selected_bibrecs':str(recID), }, 'Claim this paper', {'class': "moreinfo"}) if CFG_WEBCOMMENT_ALLOW_COMMENTS and CFG_WEBSEARCH_SHOW_COMMENT_COUNT: num_comments = get_nb_comments(recID) if num_comments: out += '<span class="moreinfo"> - %s</span>' % \ create_html_link(CFG_SITE_URL + '/record/' + str(recID) + '/comments?ln=%s' % ln, {}, num_comments > 1 and _("%i comments") % (num_comments) or _("1 comment"), {'class': "moreinfo"}) else: out += "<!--not showing reviews links-->" if CFG_WEBCOMMENT_ALLOW_REVIEWS and CFG_WEBSEARCH_SHOW_REVIEW_COUNT: num_reviews = get_nb_reviews(recID) if num_reviews: out += '<span class="moreinfo"> - %s</span>' % \ create_html_link(CFG_SITE_URL + '/record/' + str(recID) + '/reviews?ln=%s' % ln, {}, num_reviews > 1 and _("%i reviews") % (num_reviews) or _("1 review"), {'class': "moreinfo"}) else: out += "<!--not showing reviews links-->" out += '</div>' return out def tmpl_xml_rss_prologue(self, current_url=None, previous_url=None, next_url=None, first_url=None, last_url=None, nb_found=None, jrec=None, rg=None): """Creates XML RSS 2.0 prologue.""" out = """<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/"> <channel> <title>%(sitename)s</title> <link>%(siteurl)s</link> <description>%(sitename)s latest documents</description> <language>%(sitelang)s</language> <pubDate>%(timestamp)s</pubDate> <category></category> <generator>Invenio %(version)s</generator> <webMaster>%(sitesupportemail)s</webMaster> <ttl>%(timetolive)s</ttl>%(previous_link)s%(next_link)s%(current_link)s%(total_results)s%(start_index)s%(items_per_page)s <image> <url>%(siteurl)s/img/site_logo_rss.png</url> <title>%(sitename)s</title> <link>%(siteurl)s</link> </image> <atom:link rel="search" href="%(siteurl)s/opensearchdescription" type="application/opensearchdescription+xml" title="Content Search" /> <textInput> <title>Search </title> <description>Search this site:</description> <name>p</name> <link>%(siteurl)s/search</link> </textInput> """ % {'sitename': CFG_SITE_NAME, 'siteurl': CFG_SITE_URL, 'sitelang': CFG_SITE_LANG, 'search_syntax': self.tmpl_opensearch_rss_url_syntax, 'timestamp': time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()), 'version': CFG_VERSION, 'sitesupportemail': CFG_SITE_SUPPORT_EMAIL, 'timetolive': CFG_WEBSEARCH_RSS_TTL, 'current_link': (current_url and \ '\n<atom:link rel="self" href="%s" />\n' % current_url) or '', 'previous_link': (previous_url and \ '\n<atom:link rel="previous" href="%s" />' % previous_url) or '', 'next_link': (next_url and \ '\n<atom:link rel="next" href="%s" />' % next_url) or '', 'first_link': (first_url and \ '\n<atom:link rel="first" href="%s" />' % first_url) or '', 'last_link': (last_url and \ '\n<atom:link rel="last" href="%s" />' % last_url) or '', 'total_results': (nb_found and \ '\n<opensearch:totalResults>%i</opensearch:totalResults>' % nb_found) or '', 'start_index': (jrec and \ '\n<opensearch:startIndex>%i</opensearch:startIndex>' % jrec) or '', 'items_per_page': (rg and \ '\n<opensearch:itemsPerPage>%i</opensearch:itemsPerPage>' % rg) or '', } return out def tmpl_xml_rss_epilogue(self): """Creates XML RSS 2.0 epilogue.""" out = """\ </channel> </rss>\n""" return out def tmpl_xml_nlm_prologue(self): """Creates XML NLM prologue.""" out = """<articles>\n""" return out def tmpl_xml_nlm_epilogue(self): """Creates XML NLM epilogue.""" out = """\n</articles>""" return out def tmpl_xml_refworks_prologue(self): """Creates XML RefWorks prologue.""" out = """<references>\n""" return out def tmpl_xml_refworks_epilogue(self): """Creates XML RefWorks epilogue.""" out = """\n</references>""" return out def tmpl_xml_endnote_prologue(self): """Creates XML EndNote prologue.""" out = """<records>\n""" return out def tmpl_xml_endnote_epilogue(self): """Creates XML EndNote epilogue.""" out = """\n</records>""" return out def tmpl_xml_marc_prologue(self): """Creates XML MARC prologue.""" out = """<collection xmlns="http://www.loc.gov/MARC21/slim">\n""" return out def tmpl_xml_marc_epilogue(self): """Creates XML MARC epilogue.""" out = """\n</collection>""" return out def tmpl_xml_mods_prologue(self): """Creates XML MODS prologue.""" out = """<modsCollection xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n xsi:schemaLocation="http://www.loc.gov/mods/v3\n http://www.loc.gov/standards/mods/v3/mods-3-3.xsd">\n""" return out def tmpl_xml_mods_epilogue(self): """Creates XML MODS epilogue.""" out = """\n</modsCollection>""" return out def tmpl_xml_default_prologue(self): """Creates XML default format prologue. (Sanity calls only.)""" out = """<collection>\n""" return out def tmpl_xml_default_epilogue(self): """Creates XML default format epilogue. (Sanity calls only.)""" out = """\n</collection>""" return out def tmpl_collection_not_found_page_title(self, colname, ln=CFG_SITE_LANG): """ Create page title for cases when unexisting collection was asked for. """ _ = gettext_set_language(ln) out = _("Collection %s Not Found") % cgi.escape(colname) return out def tmpl_collection_not_found_page_body(self, colname, ln=CFG_SITE_LANG): """ Create page body for cases when unexisting collection was asked for. """ _ = gettext_set_language(ln) out = """<h1>%(title)s</h1> <p>%(sorry)s</p> <p>%(you_may_want)s</p> """ % { 'title': self.tmpl_collection_not_found_page_title(colname, ln), 'sorry': _("Sorry, collection %s does not seem to exist.") % \ ('<strong>' + cgi.escape(colname) + '</strong>'), 'you_may_want': _("You may want to start browsing from %s.") % \ ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + \ cgi.escape(CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)) + '</a>')} return out def tmpl_alert_rss_teaser_box_for_query(self, id_query, ln, display_email_alert_part=True): """Propose teaser for setting up this query as alert or RSS feed. Parameters: - 'id_query' *int* - ID of the query we make teaser for - 'ln' *string* - The language to display - 'display_email_alert_part' *bool* - whether to display email alert part """ # load the right message language _ = gettext_set_language(ln) # get query arguments: res = run_sql("SELECT urlargs FROM query WHERE id=%s", (id_query,)) argd = {} if res: argd = cgi.parse_qs(res[0][0]) rssurl = self.build_rss_url(argd) alerturl = CFG_SITE_URL + '/youralerts/input?ln=%s&idq=%s' % (ln, id_query) if display_email_alert_part: msg_alert = _("""Set up a personal %(x_url1_open)semail alert%(x_url1_close)s or subscribe to the %(x_url2_open)sRSS feed%(x_url2_close)s.""") % \ {'x_url1_open': '<a href="%s"><img src="%s/img/mail-icon-12x8.gif" border="0" alt="" /></a> ' % (alerturl, CFG_SITE_URL) + ' <a class="google" href="%s">' % (alerturl), 'x_url1_close': '</a>', 'x_url2_open': '<a href="%s"><img src="%s/img/feed-icon-12x12.gif" border="0" alt="" /></a> ' % (rssurl, CFG_SITE_URL) + ' <a class="google" href="%s">' % rssurl, 'x_url2_close': '</a>', } else: msg_alert = _("""Subscribe to the %(x_url2_open)sRSS feed%(x_url2_close)s.""") % \ {'x_url2_open': '<a href="%s"><img src="%s/img/feed-icon-12x12.gif" border="0" alt="" /></a> ' % (rssurl, CFG_SITE_URL) + ' <a class="google" href="%s">' % rssurl, 'x_url2_close': '</a>', } out = '''<a name="googlebox"></a> <table class="googlebox"><tr><th class="googleboxheader">%(similar)s</th></tr> <tr><td class="googleboxbody">%(msg_alert)s</td></tr> </table> ''' % { 'similar' : _("Interested in being notified about new results for this query?"), 'msg_alert': msg_alert, } return out def tmpl_detailed_record_metadata(self, recID, ln, format, content, creationdate=None, modificationdate=None): """Returns the main detailed page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - 'format' *string* - The format in used to print the record - 'content' *string* - The main content of the page - 'creationdate' *string* - The creation date of the printed record - 'modificationdate' *string* - The last modification date of the printed record """ _ = gettext_set_language(ln) ## unAPI identifier out = '<abbr class="unapi-id" title="%s"></abbr>\n' % recID out += content return out def tmpl_record_plots(self, recID, ln): """ Displays little tables containing the images and captions contained in the specified document. Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display """ from invenio.search_engine import get_record from invenio.bibrecord import field_get_subfield_values from invenio.bibrecord import record_get_field_instances _ = gettext_set_language(ln) out = '' rec = get_record(recID) flds = record_get_field_instances(rec, '856', '4') images = [] for fld in flds: image = field_get_subfield_values(fld, 'u') caption = field_get_subfield_values(fld, 'y') if type(image) == list and len(image) > 0: image = image[0] else: continue if type(caption) == list and len(caption) > 0: caption = caption[0] else: continue if not image.endswith('.png'): # huh? continue if len(caption) >= 5: images.append((int(caption[:5]), image, caption[5:])) else: # we don't have any idea of the order... just put it on images.append(99999, image, caption) images = sorted(images, key=lambda x: x[0]) for (index, image, caption) in images: # let's put everything in nice little subtables with the image # next to the caption out = out + '<table width="95%" style="display: inline;">' + \ '<tr><td width="66%"><a name="' + str(index) + '" ' + \ 'href="' + image + '">' + \ '<img src="' + image + '" width="95%"/></a></td>' + \ '<td width="33%">' + caption + '</td></tr>' + \ '</table>' out = out + '<br /><br />' return out def tmpl_detailed_record_statistics(self, recID, ln, downloadsimilarity, downloadhistory, viewsimilarity): """Returns the statistics page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - downloadsimilarity *string* - downloadsimilarity box - downloadhistory *string* - downloadhistory box - viewsimilarity *string* - viewsimilarity box """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and downloadsimilarity is not None: similar = self.tmpl_print_record_list_for_similarity_boxen ( _("People who downloaded this document also downloaded:"), downloadsimilarity, ln) out = '<table>' out += ''' <tr><td>%(graph)s</td></tr> <tr><td>%(similar)s</td></tr> ''' % { 'siteurl': CFG_SITE_URL, 'recid': recID, 'ln': ln, 'similar': similar, 'more': _("more"), 'graph': downloadsimilarity } out += '</table>' out += '<br />' if CFG_BIBRANK_SHOW_READING_STATS and viewsimilarity is not None: out += self.tmpl_print_record_list_for_similarity_boxen ( _("People who viewed this page also viewed:"), viewsimilarity, ln) if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS and downloadhistory is not None: out += downloadhistory + '<br />' return out def tmpl_detailed_record_citations_prologue(self, recID, ln): """Returns the prologue of the citations page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display """ return '<table>' def tmpl_detailed_record_citations_epilogue(self, recID, ln): """Returns the epilogue of the citations page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display """ return '</table>' def tmpl_detailed_record_citations_citing_list(self, recID, ln, citinglist, sf='', so='d', sp='', rm=''): """Returns the list of record citing this one Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - citinglist *list* - a list of tuples [(x1,y1),(x2,y2),..] where x is doc id and y is number of citations """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_STATS and citinglist is not None: similar = self.tmpl_print_record_list_for_similarity_boxen( _("Cited by: %s records") % len (citinglist), citinglist, ln) out += ''' <tr><td> %(similar)s %(more)s <br /><br /> </td></tr>''' % { 'more': create_html_link( self.build_search_url(p='refersto:recid:%d' % recID, #XXXX sf=sf, so=so, sp=sp, rm=rm, ln=ln), {}, _("more")), 'similar': similar} return out def tmpl_detailed_record_citations_citation_history(self, recID, ln, citationhistory): """Returns the citations history graph of this record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - citationhistory *string* - citationhistory box """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_GRAPHS and citationhistory is not None: out = '<!--citation history--><tr><td>%s</td></tr>' % citationhistory else: out = "<!--not showing citation history. CFG_BIBRANK_SHOW_CITATION_GRAPHS:" out += str(CFG_BIBRANK_SHOW_CITATION_GRAPHS) + " citationhistory " if citationhistory: out += str(len(citationhistory)) + "-->" else: out += "no citationhistory -->" return out def tmpl_detailed_record_citations_co_citing(self, recID, ln, cociting): """Returns the list of cocited records Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - cociting *string* - cociting box """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_STATS and cociting is not None: similar = self.tmpl_print_record_list_for_similarity_boxen ( _("Co-cited with: %s records") % len (cociting), cociting, ln) out = ''' <tr><td> %(similar)s %(more)s <br /> </td></tr>''' % { 'more': create_html_link(self.build_search_url(p='cocitedwith:%d' % recID, ln=ln), {}, _("more")), 'similar': similar } return out def tmpl_detailed_record_citations_self_cited(self, recID, ln, selfcited, citinglist): """Returns the list of self-citations for this record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - selfcited list - a list of self-citations for recID """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_GRAPHS and selfcited is not None: sc_scorelist = [] #a score list for print.. for s in selfcited: #copy weight from citations weight = 0 for c in citinglist: (crec, score) = c if crec == s: weight = score tmp = [s, weight] sc_scorelist.append(tmp) scite = self.tmpl_print_record_list_for_similarity_boxen ( _(".. of which self-citations: %s records") % len (selfcited), sc_scorelist, ln) out = '<tr><td>' + scite + '</td></tr>' return out def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubdict, - citedbylist, kwtuples, authors, vtuples, names_dict, admin_link, ln): + citedbylist, kwtuples, authors, vtuples, names_dict, admin_link, is_bibauthorid, ln): """Prints stuff about the author given as authorname. 1. Author name + his/her institutes. Each institute I has a link to papers where the auhtor has I as institute. 2. Publications, number: link to search by author. 3. Keywords 4. Author collabs 5. Publication venues like journals The parameters are data structures needed to produce 1-6, as follows: req - request pubs - list of recids, probably the records that have the author as an author authorname - evident num_downloads - evident aff_pubdict - a dictionary where keys are inst names and values lists of recordids citedbylist - list of recs that cite pubs kwtuples - keyword tuples like ('HIGGS BOSON',[3,4]) where 3 and 4 are recids authors - a list of authors that have collaborated with authorname names_dict - a dict of {name: frequency} """ from invenio.search_engine import perform_request_search from operator import itemgetter _ = gettext_set_language(ln) + ib_pubs = intbitset(pubs) # Prepare data for display # construct names box header = "<strong>" + _("Name variants") + "</strong>" content = [] for name, frequency in sorted(names_dict.iteritems(), key=itemgetter(1), reverse=True): name_lnk = create_html_link(self.build_search_url(p=name, f='exactauthor'), {}, str(frequency),) content.append("%s (%s)" % (name, name_lnk)) if not content: - content = [_("No name variants found")] + content = [_("No Name Variants")] names_box = self.tmpl_print_searchresultbox(header, "<br />\n".join(content)) + # construct an extended search as an interim solution for author id + # searches. Will build "(exactauthor:v1 OR exactauthor:v2)" strings + extended_author_search_str = "" + + if is_bibauthorid: + if len(names_dict.keys()) > 1: + extended_author_search_str = '(' + + for name_index, name_query in enumerate(names_dict.keys()): + if name_index > 0: + extended_author_search_str += " OR " + + extended_author_search_str += 'exactauthor:"' + name_query + '"' + + if len(names_dict.keys()) > 1: + extended_author_search_str += ')' # construct papers box - searchstr = create_html_link(self.build_search_url(p=authorname, - f='exactauthor'), + rec_query = 'exactauthor:"' + authorname + '"' + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + + searchstr = create_html_link(self.build_search_url(p=rec_query), {}, "All papers (" + str(len(pubs)) + ")",) - line1 = "<strong>" + _("Records") + "</strong>" + line1 = "<strong>" + _("Papers") + "</strong>" line2 = searchstr + if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and num_downloads: line2 += " (" + _("downloaded") + " " line2 += str(num_downloads) + " " + _("times") + ")" + if CFG_INSPIRE_SITE: CFG_COLLS = ['Book', 'Conference', 'Introductory', 'Lectures', 'Preprint', 'Published', 'Report', 'Review', 'Thesis'] else: CFG_COLLS = ['Article', 'Book', 'Preprint', ] collsd = {} for coll in CFG_COLLS: - coll_num_papers = len(intbitset(pubs) & intbitset(perform_request_search(p="collection:" + coll))) - if coll_num_papers: - collsd[coll] = coll_num_papers + coll_papers = list(ib_pubs & intbitset(perform_request_search(f="collection", p=coll))) + if coll_papers: + collsd[coll] = coll_papers colls = collsd.keys() - colls.sort(lambda x, y: cmp(collsd[y], collsd[x])) # sort by number of papers + colls.sort(lambda x, y: cmp(len(collsd[y]), len(collsd[x]))) # sort by number of papers for coll in colls: - line2 += "<br>" + create_html_link(self.build_search_url(p='exactauthor:"' + authorname + '" ' + \ - 'collection:' + coll), - {}, coll + " (" + str(collsd[coll]) + ")",) + rec_query = 'exactauthor:"' + authorname + '" ' + 'collection:' + coll + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + ' collection:' + coll + + line2 += "<br />" + create_html_link(self.build_search_url(p=rec_query), + {}, coll + " (" + str(len(collsd[coll])) + ")",) + + if not pubs: + line2 = _("No Papers") papers_box = self.tmpl_print_searchresultbox(line1, line2) #make a authoraff string that looks like CERN (1), Caltech (2) etc authoraff = "" aff_pubdict_keys = aff_pubdict.keys() aff_pubdict_keys.sort(lambda x, y: cmp(len(aff_pubdict[y]), len(aff_pubdict[x]))) - for a in aff_pubdict_keys: - recids = "+or+".join(map(str, aff_pubdict[a])) - print_a = a - if (print_a == ' '): - print_a = _("unknown") - if authoraff: - authoraff += '<br>' - authoraff += "<a href=\"../search?f=recid&p=" + recids + "\">" + print_a + ' (' + str(len(aff_pubdict[a])) + ")</a>" + + if aff_pubdict_keys: + for a in aff_pubdict_keys: + print_a = a + if (print_a == ' '): + print_a = _("unknown affiliation") + if authoraff: + authoraff += '<br>' + authoraff += create_html_link(self.build_search_url(p=' or '.join(["%s" % x for x in aff_pubdict[a]]), + f='recid'), + {}, print_a + ' (' + str(len(aff_pubdict[a])) + ')',) + else: + authoraff = _("No Affiliations") line1 = "<strong>" + _("Affiliations") + "</strong>" line2 = authoraff affiliations_box = self.tmpl_print_searchresultbox(line1, line2) # print frequent keywords: keywstr = "" if (kwtuples): for (kw, freq) in kwtuples: if keywstr: keywstr += '<br>' - #create a link in author=x, keyword=y - searchstr = create_html_link(self.build_search_url( - p='exactauthor:"' + authorname + '" ' + - 'keyword:"' + kw + '"'), - {}, kw + " (" + str(freq) + ")",) + rec_query = 'exactauthor:"' + authorname + '" ' + 'keyword:"' + kw + '"' + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + ' keyword:"' + kw + '"' + + searchstr = create_html_link(self.build_search_url(p=rec_query), + {}, kw + " (" + str(freq) + ")",) keywstr = keywstr + " " + searchstr else: - keywstr += 'No Keywords found' + keywstr += _('No Keywords') line1 = "<strong>" + _("Frequent keywords") + "</strong>" line2 = keywstr keyword_box = self.tmpl_print_searchresultbox(line1, line2) header = "<strong>" + _("Frequent co-authors") + "</strong>" content = [] + sorted_coauthors = sorted(sorted(authors.iteritems(), key=itemgetter(0)), key=itemgetter(1), reverse=True) - for name, frequency in sorted(authors.iteritems(), - key=itemgetter(1), - reverse=True): - lnk = create_html_link(self.build_search_url(p=name, - f='exactauthor'), {}, "%s (%s)" % (name, frequency),) + for name, frequency in sorted_coauthors: + rec_query = 'exactauthor:"' + authorname + '" ' + 'exactauthor:"' + name + '"' + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + ' exactauthor:"' + name + '"' + + lnk = create_html_link(self.build_search_url(p=rec_query), {}, "%s (%s)" % (name, frequency),) content.append("%s" % lnk) if not content: - content = [_("No frequent co-authors")] + content = [_("No Frequent Co-authors")] coauthor_box = self.tmpl_print_searchresultbox(header, "<br />\n".join(content)) req.write("<h1>%s</h1>" % authorname) if admin_link: req.write('<div><a href="%s/person/%s">%s</div>' % (CFG_SITE_URL, admin_link, _("Start Person/Author Manager for this entity"))) req.write("<table width=80%><tr valign=top><td>") req.write(names_box) req.write("<br />") req.write(papers_box) req.write("<br />") req.write(keyword_box) req.write("</td>") req.write("<td> </td>") req.write("<td>") req.write(affiliations_box) req.write("<br />") req.write(coauthor_box) req.write("</td></tr></table>") # print citations: + rec_query = 'exactauthor:"' + authorname + '"' + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + if len(citedbylist): line1 = "<strong>" + _("Citations:") + "</strong>" line2 = "" + + if not pubs: + line2 = _("No Citation Information available") + req.write(self.tmpl_print_searchresultbox(line1, line2)) # print frequent co-authors: # collabstr = "" # if (authors): # for c in authors: # c = c.strip() # if collabstr: # collabstr += '<br>' # #do not add this person him/herself in the list # cUP = c.upper() # authornameUP = authorname.upper() # if not cUP == authornameUP: # commpubs = intbitset(pubs) & intbitset(perform_request_search(p="exactauthor:\"%s\" exactauthor:\"%s\"" % (authorname, c))) # collabstr = collabstr + create_html_link(self.build_search_url(p='exactauthor:"' + authorname + '" exactauthor:"' + c + '"'), # {}, c + " (" + str(len(commpubs)) + ")",) # else: collabstr += 'None' # banner = self.tmpl_print_searchresultbox("<strong>" + _("Frequent co-authors:") + "</strong>", collabstr) # print frequently publishes in journals: #if (vtuples): # pubinfo = "" # for t in vtuples: # (journal, num) = t # pubinfo += create_html_link(self.build_search_url(p='exactauthor:"' + authorname + '" ' + \ # 'journal:"' + journal + '"'), # {}, journal + " ("+str(num)+")<br/>") # banner = self.tmpl_print_searchresultbox("<strong>" + _("Frequently publishes in:") + "<strong>", pubinfo) # req.write(banner) def tmpl_detailed_record_references(self, recID, ln, content): """Returns the discussion page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - 'content' *string* - The main content of the page """ # load the right message language _ = gettext_set_language(ln) out = '' if content is not None: out += content return out def tmpl_citesummary_prologue(self, d_total_recs, l_colls, searchpattern, searchfield, ln=CFG_SITE_LANG): """HTML citesummary format, prologue. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """<p><table id="citesummary"> <tr><td><strong class="headline">%(msg_title)s</strong></td>""" % \ {'msg_title': _("Citation summary results"), } for coll, colldef in l_colls: out += '<td align="right">%s</td>' % coll out += '</tr>' out += """<tr><td><strong>%(msg_recs)s</strong></td>""" % \ {'msg_recs': _("Total number of citable papers analyzed:"), } for coll, colldef in l_colls: link_url = CFG_SITE_URL + '/search?p=' if searchpattern: p = searchpattern if searchfield: if " " in searchpattern: p = searchfield + ':"' + searchpattern + '"' else: p = searchfield + ':' + searchpattern link_url += quote(p) if colldef: link_url += '%20AND%20' + quote(colldef) link_url += '&rm=citation'; link_text = self.tmpl_nice_number(d_total_recs[coll], ln) out += '<td align="right"><a href="%s">%s</a></td>' % (link_url, link_text) out += '</tr>' return out def tmpl_citesummary_overview(self, d_total_cites, d_avg_cites, l_colls, ln=CFG_SITE_LANG): """HTML citesummary format, overview. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """<tr><td><strong>%(msg_cites)s</strong></td>""" % \ {'msg_cites': _("Total number of citations:"), } for coll, colldef in l_colls: out += '<td align="right">%s</td>' % self.tmpl_nice_number(d_total_cites[coll], ln) out += '</tr>' out += """<tr><td><strong>%(msg_avgcit)s</strong></td>""" % \ {'msg_avgcit': _("Average citations per paper:"), } for coll, colldef in l_colls: out += '<td align="right">%.1f</td>' % d_avg_cites[coll] out += '</tr>' out += """<tr><td><strong>%(msg_breakdown)s</strong></td></tr>""" % \ {'msg_breakdown': _("Breakdown of papers by citations:"), } return out def tmpl_citesummary_breakdown_by_fame(self, d_cites, low, high, fame, l_colls, searchpattern, searchfield, ln=CFG_SITE_LANG): """HTML citesummary format, breakdown by fame. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """<tr><td>%(fame)s</td>""" % \ {'fame': fame, } for coll, colldef in l_colls: link_url = CFG_SITE_URL + '/search?p=' if searchpattern: p = searchpattern if searchfield: if " " in searchpattern: p = searchfield + ':"' + searchpattern + '"' else: p = searchfield + ':' + searchpattern link_url += quote(p) + '%20AND%20' if colldef: link_url += quote(colldef) + '%20AND%20' if low == 0 and high == 0: link_url += quote('cited:0') else: link_url += quote('cited:%i->%i' % (low, high)) link_url += '&rm=citation'; link_text = self.tmpl_nice_number(d_cites[coll], ln) out += '<td align="right"><a href="%s">%s</a></td>' % (link_url, link_text) out += '</tr>' return out def tmpl_citesummary_h_index(self, d_h_factors, l_colls, ln=CFG_SITE_LANG): """HTML citesummary format, h factor output. A part of the HCS suite.""" _ = gettext_set_language(ln) out = "<tr><td></td></tr><tr><td><strong>%(msg_additional)s</strong> <small><small>[<a href=\"%(help_url)s\">?</a>]</small></small></td></tr>" % \ {'msg_additional': _("Additional Citation Metrics"), 'help_url': CFG_SITE_URL + '/help/citation-metrics', } out += '<tr><td>h-index <small><small>[<a href="' # use ? help linking in the style of oai_repository_admin.py out += '%s">' % (CFG_SITE_URL + '/help/citation-metrics#citesummary_h-index') out += '?</a>]</small></small></td>' for coll, colldef in l_colls: out += '<td align="right">%s</td>' % self.tmpl_nice_number(d_h_factors[coll], ln) out += '</tr>' return out def tmpl_citesummary_epilogue(self, ln=CFG_SITE_LANG): """HTML citesummary format, epilogue. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """</table>""" return out def tmpl_unapi(self, formats, identifier=None): """ Provide a list of object format available from the unAPI service for the object identified by IDENTIFIER """ out = '<?xml version="1.0" encoding="UTF-8" ?>\n' if identifier: out += '<formats id="%i">\n' % (identifier) else: out += "<formats>\n" for format_name, format_type in formats.iteritems(): docs = '' if format_name == 'xn': docs = 'http://www.nlm.nih.gov/databases/dtd/' format_type = 'application/xml' format_name = 'nlm' elif format_name == 'xm': docs = 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd' format_type = 'application/xml' format_name = 'marcxml' elif format_name == 'xr': format_type = 'application/rss+xml' docs = 'http://www.rssboard.org/rss-2-0/' elif format_name == 'xw': format_type = 'application/xml' docs = 'http://www.refworks.com/RefWorks/help/RefWorks_Tagged_Format.htm' elif format_name == 'xoaidc': format_type = 'application/xml' docs = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd' elif format_name == 'xe': format_type = 'application/xml' docs = 'http://www.endnote.com/support/' format_name = 'endnote' elif format_name == 'xd': format_type = 'application/xml' docs = 'http://dublincore.org/schemas/' format_name = 'dc' elif format_name == 'xo': format_type = 'application/xml' docs = 'http://www.loc.gov/standards/mods/v3/mods-3-3.xsd' format_name = 'mods' if docs: out += '<format name="%s" type="%s" docs="%s" />\n' % (xml_escape(format_name), xml_escape(format_type), xml_escape(docs)) else: out += '<format name="%s" type="%s" />\n' % (xml_escape(format_name), xml_escape(format_type)) out += "</formats>" return out diff --git a/modules/websearch/lib/websearch_webinterface.py b/modules/websearch/lib/websearch_webinterface.py index 6b213b85a..f995244c5 100644 --- a/modules/websearch/lib/websearch_webinterface.py +++ b/modules/websearch/lib/websearch_webinterface.py @@ -1,1390 +1,1446 @@ ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSearch URL handler.""" __revision__ = "$Id$" import cgi import os import datetime import time import sys from urllib import quote from invenio import webinterface_handler_config as apache #maximum number of collaborating authors etc shown in GUI MAX_COLLAB_LIST = 10 MAX_KEYWORD_LIST = 10 MAX_VENUE_LIST = 10 #tag constants AUTHOR_TAG = "100__a" AUTHOR_INST_TAG = "100__u" COAUTHOR_TAG = "700__a" COAUTHOR_INST_TAG = "700__u" VENUE_TAG = "909C4p" KEYWORD_TAG = "695__a" FKEYWORD_TAG = "6531_a" CFG_INSPIRE_UNWANTED_KEYWORDS_START = ['talk', 'conference', 'conference proceedings', 'numerical calculations', 'experimental results', 'review', 'bibliography', 'upper limit', 'lower limit', 'tables', 'search for', 'on-shell', 'off-shell', 'formula', 'lectures', 'book', 'thesis'] CFG_INSPIRE_UNWANTED_KEYWORDS_MIDDLE = ['GeV', '(('] if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from invenio.config import \ CFG_SITE_URL, \ CFG_SITE_NAME, \ CFG_CACHEDIR, \ CFG_SITE_LANG, \ CFG_SITE_SECURE_URL, \ CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \ CFG_WEBSEARCH_INSTANT_BROWSE_RSS, \ CFG_WEBSEARCH_RSS_TTL, \ CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS, \ CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \ CFG_WEBDIR, \ CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS, \ CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \ CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL, \ CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \ CFG_WEBSEARCH_RSS_I18N_COLLECTIONS, \ CFG_INSPIRE_SITE from invenio.dbquery import Error from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.urlutils import redirect_to_url, make_canonical_urlargd, drop_default_urlargd from invenio.webuser import getUid, page_not_authorized, get_user_preferences, \ collect_user_info, logoutUser, isUserSuperAdmin from invenio.websubmit_webinterface import WebInterfaceFilesPages from invenio.webcomment_webinterface import WebInterfaceCommentsPages from invenio.bibcirculation_webinterface import WebInterfaceHoldingsPages from invenio.webpage import page, create_error_box from invenio.messages import gettext_set_language from invenio.search_engine import check_user_can_view_record, \ collection_reclist_cache, \ collection_restricted_p, \ create_similarly_named_authors_link_box, \ get_colID, \ get_coll_i18nname, \ get_fieldvalues, \ get_fieldvalues_alephseq_like, \ get_most_popular_field_values, \ get_mysql_recid_from_aleph_sysno, \ guess_primary_collection_of_a_record, \ page_end, \ page_start, \ perform_request_cache, \ perform_request_log, \ perform_request_search, \ restricted_collection_cache from invenio.access_control_engine import acc_authorize_action from invenio.access_control_config import VIEWRESTRCOLL from invenio.access_control_mailcookie import mail_cookie_create_authorize_action from invenio.bibformat import format_records from invenio.bibformat_engine import get_output_formats from invenio.websearch_webcoll import mymkdir, get_collection from invenio.intbitset import intbitset from invenio.bibupload import find_record_from_sysno from invenio.bibrank_citation_searcher import get_cited_by_list from invenio.bibrank_downloads_indexer import get_download_weight_total from invenio.search_engine_summarizer import summarize_records from invenio.errorlib import register_exception from invenio.bibedit_webinterface import WebInterfaceEditPages from invenio.bibeditmulti_webinterface import WebInterfaceMultiEditPages from invenio.bibmerge_webinterface import WebInterfaceMergePages import invenio.template websearch_templates = invenio.template.load('websearch') search_results_default_urlargd = websearch_templates.search_results_default_urlargd search_interface_default_urlargd = websearch_templates.search_interface_default_urlargd try: output_formats = [output_format['attrs']['code'].lower() for output_format in \ get_output_formats(with_attributes=True).values()] except KeyError: output_formats = ['xd', 'xm', 'hd', 'hb', 'hs', 'hx'] output_formats.extend(['hm', 't', 'h']) def wash_search_urlargd(form): """ Create canonical search arguments from those passed via web form. """ argd = wash_urlargd(form, search_results_default_urlargd) if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] # Sometimes, users pass ot=245,700 instead of # ot=245&ot=700. Normalize that. ots = [] for ot in argd['ot']: ots += ot.split(',') argd['ot'] = ots # We can either get the mode of function as # action=<browse|search>, or by setting action_browse or # action_search. if argd['action_browse']: argd['action'] = 'browse' elif argd['action_search']: argd['action'] = 'search' else: if argd['action'] not in ('browse', 'search'): argd['action'] = 'search' del argd['action_browse'] del argd['action_search'] return argd class WebInterfaceUnAPIPages(WebInterfaceDirectory): """ Handle /unapi set of pages.""" _exports = [''] def __call__(self, req, form): argd = wash_urlargd(form, { 'id' : (int, 0), 'format' : (str, '')}) formats_dict = get_output_formats(True) formats = {} for format in formats_dict.values(): if format['attrs']['visibility']: formats[format['attrs']['code'].lower()] = format['attrs']['content_type'] del formats_dict if argd['id'] and argd['format']: ## Translate back common format names format = { 'nlm' : 'xn', 'marcxml' : 'xm', 'dc' : 'xd', 'endnote' : 'xe', 'mods' : 'xo' }.get(argd['format'], argd['format']) if format in formats: redirect_to_url(req, '%s/record/%s/export/%s' % (CFG_SITE_URL, argd['id'], format)) else: raise apache.SERVER_RETURN, apache.HTTP_NOT_ACCEPTABLE elif argd['id']: return websearch_templates.tmpl_unapi(formats, identifier=argd['id']) else: return websearch_templates.tmpl_unapi(formats) index = __call__ class WebInterfaceAuthorPages(WebInterfaceDirectory): """ Handle /author/Doe%2C+John page requests as well as /author/<bibrec_id>:<authorname_string> (e.g. /author/15:Doe%2C+John) requests. The latter will try to find a person from the personid universe and will display the joint information from that particular author cluster. This interface will handle the following URLs: - /author/Doe%2C+John which will show information on the exactauthor search - /author/<bibrec_id>:<authorname_string> (e.g. /author/15:Doe%2C+John) will try to find a person from the personid universe and will display the joint information from that particular author cluster. - /author/<personid> (e.g. /author/152) will display the joint information from that particular author cluster (an entity called person). """ _exports = ['author'] def __init__(self, pageparam=''): """Constructor.""" - self.pageparam = pageparam.replace("+", " ") + self.pageparam = cgi.escape(pageparam.replace("+", " ")) self.personid = -1 self.authorname = " " def _lookup(self, component, path): """This handler parses dynamic URLs (/author/John+Doe).""" return WebInterfaceAuthorPages(component), path def __call__(self, req, form): """Serve the page in the given language.""" - is_bibauthorid = True + is_bibauthorid = False try: from invenio.bibauthorid_webapi import search_person_ids_by_name from invenio.bibauthorid_webapi import get_papers_by_person_id from invenio.bibauthorid_webapi import get_person_names_from_id + from invenio.bibauthorid_webapi import get_person_db_names_from_id from invenio.bibauthorid_utils import create_normalized_name from invenio.bibauthorid_utils import split_name_parts from invenio.bibauthorid_config import CLAIMPAPER_CLAIM_OTHERS_PAPERS from invenio.access_control_admin import acc_find_user_role_actions + is_bibauthorid = True except (ImportError): is_bibauthorid = False from operator import itemgetter - argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0) }) + argd = wash_urlargd(form, + {'ln': (str, CFG_SITE_LANG), + 'verbose': (int, 0), + 'recid': (int, -1) + }) ln = argd['ln'] verbose = argd['verbose'] req.argd = argd #needed since perform_req_search + param_recid = argd['recid'] # start page req.content_type = "text/html" req.send_http_header() uid = getUid(req) page_start(req, "hb", "", "", ln, uid) pubs = [] authors = [] recid = None nquery = "" names_dict = {} + db_names_dict = {} + _ = gettext_set_language(ln) #let's see what takes time.. time1 = time.time() genstart = time1 time2 = time.time() #check if it is a person id: try: self.personid = int(self.pageparam) except (ValueError, TypeError): self.personid = -1 - # Well, it's not a person id, maybe a bibrec:name or name:bibrec pair? - if self.personid < 1 and is_bibauthorid: - if self.pageparam.count(":"): + if self.personid < 0 and is_bibauthorid: + if param_recid > -1: + # Well, it's not a person id, did we get a record ID? + recid = param_recid + nquery = self.pageparam + elif self.pageparam.count(":"): + # No recid passed, maybe name is recid:name or name:recid pair? left, right = self.pageparam.split(":") try: recid = int(left) nquery = str(right) except (ValueError, TypeError): try: recid = int(right) nquery = str(left) except (ValueError, TypeError): recid = None nquery = self.pageparam else: + # No recid could be determined. Work with name only nquery = self.pageparam sorted_results = search_person_ids_by_name(nquery) for results in sorted_results: pid = results[0] authorpapers = get_papers_by_person_id(pid, -1) authorpapers = sorted(authorpapers, key=itemgetter(0), reverse=True) if (recid and not (str(recid) in [row[0] for row in authorpapers])): continue authors.append([results[0], results[1], authorpapers[0:4]]) search_results = authors - if search_results: + if len(search_results) == 1: self.personid = search_results[0][0] + #@todo: Show selection of possible Person entities if len > 1 - if self.personid < 1 or not is_bibauthorid: + if self.personid < 0 or not is_bibauthorid: # Well, no person. Fall back to the exact author name search then. self.authorname = self.pageparam if not self.authorname: return websearch_templates.tmpl_author_information(req, {}, self.authorname, - 0, {}, {}, - {}, {}, {}, {}, ln) + 0, {}, {}, {}, + {}, {}, {}, {}, False, ln) #search the publications by this author pubs = perform_request_search(req=req, p=self.authorname, f="exactauthor") names_dict[self.authorname] = len(pubs) + db_names_dict[self.authorname] = len(pubs) - elif is_bibauthorid: + elif is_bibauthorid and self.personid > -1: #yay! Person found! find only papers not disapproved by humans + req.write("<!-- Authorpages are Bibauthorid-powered !-->") full_pubs = get_papers_by_person_id(self.personid, -1) pubs = [int(row[0]) for row in full_pubs] longest_name = "" try: self.personid = int(self.personid) except (TypeError, ValueError): raise ValueError("Personid must be a number!") for aname, acount in get_person_names_from_id(self.personid): names_dict[aname] = acount norm_name = create_normalized_name(split_name_parts(aname)) if len(norm_name) > len(longest_name): longest_name = norm_name + for aname, acount in get_person_db_names_from_id(self.personid): + aname = aname.replace('"','').strip() + db_names_dict[aname] = acount + self.authorname = longest_name + if not pubs and param_recid > -1: + req.write("<p>") + req.write(_("We're sorry. The requested author \"%s\" seems not to be listed on the specified paper." + % (self.pageparam,))) + req.write("<br />") + req.write(_("Please try the following link to start a broader search on the author: ")) + req.write('<a href="%s/author/%s">%s</a>' + % (CFG_SITE_URL, self.pageparam, self.pageparam)) + req.write("</p>") + + return page_end(req, 'hb', ln) + #get most frequent authors of these pubs popular_author_tuples = get_most_popular_field_values(pubs, (AUTHOR_TAG, COAUTHOR_TAG)) coauthors = {} for (coauthor, frequency) in popular_author_tuples: - if len(authors) < MAX_COLLAB_LIST: - if coauthor not in names_dict: - coauthors[coauthor] = frequency + if coauthor not in db_names_dict: + coauthors[coauthor] = frequency + + if len(coauthors) > MAX_COLLAB_LIST: + break time1 = time.time() if verbose == 9: req.write("<br/>popularized authors: " + str(time1 - time2) + "<br/>") #and publication venues venuetuples = get_most_popular_field_values(pubs, (VENUE_TAG)) time2 = time.time() if verbose == 9: req.write("<br/>venues: " + str(time2 - time1) + "<br/>") #and keywords kwtuples = get_most_popular_field_values(pubs, (KEYWORD_TAG, FKEYWORD_TAG), count_repetitive_values=False) if CFG_INSPIRE_SITE: # filter kw tuples against unwanted keywords: kwtuples_filtered = () for (kw, num) in kwtuples: kwlower = kw.lower() kwlower_unwanted = False for unwanted_keyword in CFG_INSPIRE_UNWANTED_KEYWORDS_START: if kwlower.startswith(unwanted_keyword): kwlower_unwanted = True # unwanted keyword found break for unwanted_keyword in CFG_INSPIRE_UNWANTED_KEYWORDS_MIDDLE: if unwanted_keyword in kwlower: kwlower_unwanted = True # unwanted keyword found break if not kwlower_unwanted: kwtuples_filtered += ((kw, num),) kwtuples = kwtuples_filtered time1 = time.time() if verbose == 9: req.write("<br/>keywords: " + str(time1 - time2) + "<br/>") #construct a simple list of tuples that contains keywords that appear #more than once moreover, limit the length of the list #to MAX_KEYWORD_LIST kwtuples = kwtuples[0:MAX_KEYWORD_LIST] vtuples = venuetuples[0:MAX_VENUE_LIST] time2 = time.time() if verbose == 9: req.write("<br/>misc: " + str(time2 - time1) + "<br/>") #a dict. keys: affiliations, values: lists of publications - author_aff_pubs = self.get_institute_pub_dict(pubs, names_dict.keys()) + author_aff_pubs = self.get_institute_pub_dict(pubs, db_names_dict.keys()) time1 = time.time() if verbose == 9: req.write("<br/>affiliations: " + str(time1 - time2) + "<br/>") totaldownloads = 0 if CFG_BIBRANK_SHOW_DOWNLOAD_STATS: #find out how many times these records have been downloaded recsloads = {} recsloads = get_download_weight_total(recsloads, pubs) #sum up for k in recsloads.keys(): totaldownloads = totaldownloads + recsloads[k] #get cited by.. citedbylist = get_cited_by_list(pubs) admin_link = None if is_bibauthorid and self.personid >= 0: if [i[1] for i in acc_find_user_role_actions({'uid': uid}) if i[1] == CLAIMPAPER_CLAIM_OTHERS_PAPERS]: admin_link = self.personid time1 = time.time() if verbose == 9: req.write("<br/>citedby: " + str(time1 - time2) + "<br/>") #finally all stuff there, call the template websearch_templates.tmpl_author_information(req, pubs, self.authorname, totaldownloads, author_aff_pubs, citedbylist, kwtuples, coauthors, vtuples, - names_dict, admin_link, ln) + db_names_dict, admin_link, + is_bibauthorid, ln) time1 = time.time() #cited-by summary - out = summarize_records(intbitset(pubs), 'hcs', ln, req=req) + rec_query = 'exactauthor:"' + self.authorname + '"' + + extended_author_search_str = "" + + if is_bibauthorid: + if len(db_names_dict.keys()) > 1: + extended_author_search_str = '(' + + for name_index, name_query in enumerate(db_names_dict.keys()): + if name_index > 0: + extended_author_search_str += " OR " + + extended_author_search_str += 'exactauthor:"' + name_query + '"' + + if len(db_names_dict.keys()) > 1: + extended_author_search_str += ')' + + if is_bibauthorid and extended_author_search_str: + rec_query = extended_author_search_str + + + if pubs: + req.write(summarize_records(intbitset(pubs), 'hcs', ln, rec_query, req=req)) time2 = time.time() if verbose == 9: req.write("<br/>summarizer: " + str(time2 - time1) + "<br/>") - req.write(out) - # simauthbox = create_similarly_named_authors_link_box(self.authorname) # req.write(simauthbox) if verbose == 9: req.write("<br/>all: " + str(time.time() - genstart) + "<br/>") return page_end(req, 'hb', ln) def get_institute_pub_dict(self, recids, names_list): """return a dictionary consisting of institute -> list of publications""" author_aff_pubs = {} #the dictionary to be built for recid in recids: #iterate all so that we get first author's intitute #if this the first author OR #"his" institute if he is an affliate author affus = [] #list of insts from the given record mainauthors = get_fieldvalues(recid, AUTHOR_TAG) mainauthor = " " if mainauthors: mainauthor = mainauthors[0] if (mainauthor in names_list): affus = get_fieldvalues(recid, AUTHOR_INST_TAG) else: #search for coauthors.. coauthor_field_lines = [] coauthorfield_content = get_fieldvalues_alephseq_like(recid, \ COAUTHOR_TAG[:3]) if coauthorfield_content: coauthor_field_lines = coauthorfield_content.split("\n") for line in coauthor_field_lines: for name_item in names_list: breakit = False if line.count(name_item) > 0: #get affilitions .. the correct ones are $$+code code = COAUTHOR_INST_TAG[-1] myparts = line.split("$$") for part in myparts: if part and part[0] == code: myaff = part[1:] affus.append(myaff) breakit = True if breakit: break #if this is empty, add a dummy " " value if (affus == []): affus = [" "] for a in affus: #add in author_aff_pubs if (author_aff_pubs.has_key(a)): tmp = author_aff_pubs[a] tmp.append(recid) author_aff_pubs[a] = tmp else: author_aff_pubs[a] = [recid] return author_aff_pubs index = __call__ class WebInterfaceRecordPages(WebInterfaceDirectory): """ Handling of a /record/<recid> URL fragment """ _exports = ['', 'files', 'reviews', 'comments', 'usage', 'references', 'export', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots'] #_exports.extend(output_formats) def __init__(self, recid, tab, format=None): self.recid = recid self.tab = tab self.format = format self.files = WebInterfaceFilesPages(self.recid) self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1) self.comments = WebInterfaceCommentsPages(self.recid) self.usage = self self.references = self self.keywords = self self.holdings = WebInterfaceHoldingsPages(self.recid) self.citations = self self.plots = self self.export = WebInterfaceRecordExport(self.recid, self.format) self.edit = WebInterfaceEditPages(self.recid) self.merge = WebInterfaceMergePages(self.recid) return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid argd['tab'] = self.tab if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and not isUserSuperAdmin(user_info): argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /record/123 or /record/123/ index = __call__ class WebInterfaceRecordRestrictedPages(WebInterfaceDirectory): """ Handling of a /record-restricted/<recid> URL fragment """ _exports = ['', 'files', 'reviews', 'comments', 'usage', 'references', 'export', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots'] #_exports.extend(output_formats) def __init__(self, recid, tab, format=None): self.recid = recid self.tab = tab self.format = format self.files = WebInterfaceFilesPages(self.recid) self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1) self.comments = WebInterfaceCommentsPages(self.recid) self.usage = self self.references = self self.keywords = self self.holdings = WebInterfaceHoldingsPages(self.recid) self.citations = self self.plots = self self.export = WebInterfaceRecordExport(self.recid, self.format) self.edit = WebInterfaceEditPages(self.recid) self.merge = WebInterfaceMergePages(self.recid) return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and not isUserSuperAdmin(user_info): argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS record_primary_collection = guess_primary_collection_of_a_record(self.recid) if collection_restricted_p(record_primary_collection): (auth_code, dummy) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection) if auth_code: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') # Keep all the arguments, they might be reused in the # record page itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /record/123 or /record/123/ index = __call__ class WebInterfaceSearchResultsPages(WebInterfaceDirectory): """ Handling of the /search URL and its sub-pages. """ _exports = ['', 'authenticate', 'cache', 'log'] def __call__(self, req, form): """ Perform a search. """ argd = wash_search_urlargd(form) _ = gettext_set_language(argd['ln']) if req.method == 'POST': raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized(req, "../", text=_("You are not authorized to view this area."), navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass if CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL == 2: ## Let's update the current collections list with all ## the restricted collections the user has rights to view. try: restricted_collections = user_info['precached_permitted_restricted_collections'] argd_collections = set(argd['c']) argd_collections.update(restricted_collections) argd['c'] = list(argd_collections) except KeyError: pass if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and not isUserSuperAdmin(user_info): argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS involved_collections = set() involved_collections.update(argd['c']) involved_collections.add(argd['cc']) if argd['id'] > 0: argd['recid'] = argd['id'] if argd['idb'] > 0: argd['recidb'] = argd['idb'] if argd['sysno']: tmp_recid = find_record_from_sysno(argd['sysno']) if tmp_recid: argd['recid'] = tmp_recid if argd['sysnb']: tmp_recid = find_record_from_sysno(argd['sysnb']) if tmp_recid: argd['recidb'] = tmp_recid if argd['recid'] > 0: if argd['recidb'] > argd['recid']: # Hack to check if among the restricted collections # at least a record of the range is there and # then if the user is not authorized for that # collection. recids = intbitset(xrange(argd['recid'], argd['recidb'])) restricted_collection_cache.recreate_cache_if_needed() for collname in restricted_collection_cache.cache: (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collname) if auth_code and user_info['email'] == 'guest': coll_recids = get_collection(collname).reclist if coll_recids & recids: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : collname}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') else: involved_collections.add(guess_primary_collection_of_a_record(argd['recid'])) # If any of the collection requires authentication, redirect # to the authentication form. for coll in involved_collections: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out def cache(self, req, form): """Search cache page.""" argd = wash_urlargd(form, {'action': (str, 'show')}) return perform_request_cache(req, action=argd['action']) def log(self, req, form): """Search log page.""" argd = wash_urlargd(form, {'date': (str, '')}) return perform_request_log(req, date=argd['date']) def authenticate(self, req, form): """Restricted search results pages.""" argd = wash_search_urlargd(form) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd uid = getUid(req) if uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out index = __call__ class WebInterfaceLegacySearchPages(WebInterfaceDirectory): """ Handling of the /search.py URL and its sub-pages. """ _exports = ['', ('authenticate', 'index')] def __call__(self, req, form): """ Perform a search. """ argd = wash_search_urlargd(form) # We either jump into the generic search form, or the specific # /record/... display if a recid is requested if argd['recid'] != -1: target = '/record/%d' % argd['recid'] del argd['recid'] else: target = '/search' target += make_canonical_urlargd(argd, search_results_default_urlargd) return redirect_to_url(req, target, apache.HTTP_MOVED_PERMANENTLY) index = __call__ # Parameters for the legacy URLs, of the form /?c=ALEPH legacy_collection_default_urlargd = { 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'verbose': (int, 0), 'c': (str, CFG_SITE_NAME)} class WebInterfaceSearchInterfacePages(WebInterfaceDirectory): """ Handling of collection navigation.""" _exports = [('index.py', 'legacy_collection'), ('', 'legacy_collection'), ('search.py', 'legacy_search'), 'search', 'openurl', 'opensearchdescription', 'logout_SSO_hook'] search = WebInterfaceSearchResultsPages() legacy_search = WebInterfaceLegacySearchPages() def logout_SSO_hook(self, req, form): """Script triggered by the display of the centralized SSO logout dialog. It logouts the user from Invenio and stream back the expected picture.""" logoutUser(req) req.content_type = 'image/gif' req.encoding = None req.filename = 'wsignout.gif' req.headers_out["Content-Disposition"] = "inline; filename=wsignout.gif" req.set_content_length(os.path.getsize('%s/img/wsignout.gif' % CFG_WEBDIR)) req.send_http_header() req.sendfile('%s/img/wsignout.gif' % CFG_WEBDIR) def _lookup(self, component, path): """ This handler is invoked for the dynamic URLs (for collections and records)""" if component == 'collection': c = '/'.join(path) def answer(req, form): """Accessing collections cached pages.""" # Accessing collections: this is for accessing the # cached page on top of each collection. argd = wash_urlargd(form, search_interface_default_urlargd) # We simply return the cached page of the collection argd['c'] = c if not argd['c']: # collection argument not present; display # home collection by default argd['c'] = CFG_SITE_NAME # Treat `as' argument specially: if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] return display_collection(req, **argd) return answer, [] elif component == 'record' and path and path[0] == 'merge': return WebInterfaceMergePages(), path[1:] elif component == 'record' and path and path[0] == 'edit': return WebInterfaceEditPages(), path[1:] elif component == 'record' and path and path[0] == 'multiedit': return WebInterfaceMultiEditPages(), path[1:] elif component == 'record' or component == 'record-restricted': try: if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: # let us try to recognize /record/<SYSNO> style of URLs: x = get_mysql_recid_from_aleph_sysno(path[0]) if x: recid = x else: recid = int(path[0]) else: recid = int(path[0]) except IndexError: # display record #1 for URL /record without a number recid = 1 except ValueError: if path[0] == '': # display record #1 for URL /record/ without a number recid = 1 else: # display page not found for URLs like /record/foo return None, [] if recid <= 0: # display page not found for URLs like /record/-5 or /record/0 return None, [] format = None tab = '' try: if path[1] in ['', 'files', 'reviews', 'comments', 'usage', 'references', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots']: tab = path[1] elif path[1] == 'export': tab = '' format = path[2] # format = None # elif path[1] in output_formats: # tab = '' # format = path[1] else: # display page not found for URLs like /record/references # for a collection where 'references' tabs is not visible return None, [] except IndexError: # Keep normal url if tabs is not specified pass #if component == 'record-restricted': #return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:] #else: return WebInterfaceRecordPages(recid, tab, format), path[1:] return None, [] def openurl(self, req, form): """ OpenURL Handler.""" argd = wash_urlargd(form, websearch_templates.tmpl_openurl_accepted_args) ret_url = websearch_templates.tmpl_openurl2invenio(argd) if ret_url: return redirect_to_url(req, ret_url) else: return redirect_to_url(req, CFG_SITE_URL) def opensearchdescription(self, req, form): """OpenSearch description file""" req.content_type = "application/opensearchdescription+xml" req.send_http_header() argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0) }) return websearch_templates.tmpl_opensearch_description(ln=argd['ln']) def legacy_collection(self, req, form): """Collection URL backward compatibility handling.""" accepted_args = dict(legacy_collection_default_urlargd) argd = wash_urlargd(form, accepted_args) # Treat `as' argument specially: if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] # If we specify no collection, then we don't need to redirect # the user, so that accessing <http://yoursite/> returns the # default collection. if not form.has_key('c'): return display_collection(req, **argd) # make the collection an element of the path, and keep the # other query elements as is. If the collection is CFG_SITE_NAME, # however, redirect to the main URL. c = argd['c'] del argd['c'] if c == CFG_SITE_NAME: target = '/' else: target = '/collection/' + quote(c) # Treat `as' argument specially: # We are going to redirect, so replace `aas' by `as' visible argument: if argd.has_key('aas'): argd['as'] = argd['aas'] del argd['aas'] target += make_canonical_urlargd(argd, legacy_collection_default_urlargd) return redirect_to_url(req, target) def display_collection(req, c, aas, verbose, ln): """Display search interface page for collection c by looking in the collection cache.""" _ = gettext_set_language(ln) req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln}, search_interface_default_urlargd) # get user ID: try: uid = getUid(req) user_preferences = {} if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this collection", navmenuid='search') elif uid > 0: user_preferences = get_user_preferences(uid) except Error: register_exception(req=req, alert_admin=True) return page(title=_("Internal Error"), body=create_error_box(req, verbose=verbose, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req, navmenuid='search') # start display: req.content_type = "text/html" req.send_http_header() # deduce collection id: colID = get_colID(c) if type(colID) is not int: page_body = '<p>' + (_("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>' page_body = '<p>' + (_("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>' if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND return page(title=_("Collection %s Not Found") % cgi.escape(c), body=page_body, description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))), keywords="%s" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') # wash `aas' argument: if not os.path.exists("%s/collections/%d/body-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln)): # nonexistent `aas' asked for, fall back to Simple Search: aas = 0 # display collection interface page: try: filedesc = open("%s/collections/%d/navtrail-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln), "r") c_navtrail = filedesc.read() filedesc.close() except: c_navtrail = "" try: filedesc = open("%s/collections/%d/body-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln), "r") c_body = filedesc.read() filedesc.close() except: c_body = "" try: filedesc = open("%s/collections/%d/portalbox-tp-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_tp = filedesc.read() filedesc.close() except: c_portalbox_tp = "" try: filedesc = open("%s/collections/%d/portalbox-te-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_te = filedesc.read() filedesc.close() except: c_portalbox_te = "" try: filedesc = open("%s/collections/%d/portalbox-lt-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_lt = filedesc.read() filedesc.close() except: c_portalbox_lt = "" try: # show help boxes (usually located in "tr", "top right") # if users have not banned them in their preferences: c_portalbox_rt = "" if user_preferences.get('websearch_helpbox', 1) > 0: filedesc = open("%s/collections/%d/portalbox-rt-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_rt = filedesc.read() filedesc.close() except: c_portalbox_rt = "" try: filedesc = open("%s/collections/%d/last-updated-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_last_updated = filedesc.read() filedesc.close() except: c_last_updated = "" try: title = get_coll_i18nname(c, ln) except: title = "" show_title_p = True body_css_classes = [] if c == CFG_SITE_NAME: # Do not display title on home collection show_title_p = False body_css_classes.append('home') if len(collection_reclist_cache.cache.keys()) == 1: # if there is only one collection defined, do not print its # title on the page as it would be displayed repetitively. show_title_p = False if aas == -1: show_title_p = False # RSS: rssurl = CFG_SITE_URL + '/rss' rssurl_params = [] if c != CFG_SITE_NAME: rssurl_params.append('cc=' + quote(c)) if ln != CFG_SITE_LANG and \ c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS: rssurl_params.append('ln=' + ln) if rssurl_params: rssurl += '?' + '&'.join(rssurl_params) if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: metaheaderadd = """ <script src='/MathJax/MathJax.js' type='text/javascript'></script> """ else: metaheaderadd = '' return page(title=title, body=c_body, navtrail=c_navtrail, description="%s - %s" % (CFG_SITE_NAME, c), keywords="%s, %s" % (CFG_SITE_NAME, c), metaheaderadd=metaheaderadd, uid=uid, language=ln, req=req, cdspageboxlefttopadd=c_portalbox_lt, cdspageboxrighttopadd=c_portalbox_rt, titleprologue=c_portalbox_tp, titleepilogue=c_portalbox_te, lastupdated=c_last_updated, navmenuid='search', rssurl=rssurl, body_css_classes=body_css_classes, show_title_p=show_title_p) class WebInterfaceRSSFeedServicePages(WebInterfaceDirectory): """RSS 2.0 feed service pages.""" def __call__(self, req, form): """RSS 2.0 feed service.""" # Keep only interesting parameters for the search default_params = websearch_templates.rss_default_urlargd # We need to keep 'jrec' and 'rg' here in order to have # 'multi-page' RSS. These parameters are not kept be default # as we don't want to consider them when building RSS links # from search and browse pages. default_params.update({'jrec':(int, 1), 'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)}) argd = wash_urlargd(form, default_params) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # Create a standard filename with these parameters current_url = websearch_templates.build_rss_url(argd) cache_filename = current_url.split('/')[-1] # In the same way as previously, add 'jrec' & 'rg' req.content_type = "application/rss+xml" req.send_http_header() try: # Try to read from cache path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) # Check if cache needs refresh filedesc = open(path, "r") last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime) assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)) c_rss = filedesc.read() filedesc.close() req.write(c_rss) return except Exception, e: # do it live and cache previous_url = None if argd['jrec'] > 1: prev_jrec = argd['jrec'] - argd['rg'] if prev_jrec < 1: prev_jrec = 1 previous_url = websearch_templates.build_rss_url(argd, jrec=prev_jrec) recIDs = perform_request_search(req, of="id", c=argd['c'], cc=argd['cc'], p=argd['p'], f=argd['f'], p1=argd['p1'], f1=argd['f1'], m1=argd['m1'], op1=argd['op1'], p2=argd['p2'], f2=argd['f2'], m2=argd['m2'], op2=argd['op2'], p3=argd['p3'], f3=argd['f3'], m3=argd['m3']) nb_found = len(recIDs) next_url = None if len(recIDs) >= argd['jrec'] + argd['rg']: next_url = websearch_templates.build_rss_url(argd, jrec=(argd['jrec'] + argd['rg'])) first_url = websearch_templates.build_rss_url(argd, jrec=1) last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1) recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1] rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \ websearch_templates.tmpl_xml_rss_prologue(current_url=current_url, previous_url=previous_url, next_url=next_url, first_url=first_url, last_url=last_url, nb_found=nb_found, jrec=argd['jrec'], rg=argd['rg']) + '\n' req.write(rss_prologue) rss_body = format_records(recIDs, of='xr', ln=argd['ln'], user_info=user_info, record_separator="\n", req=req, epilogue="\n") rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n' req.write(rss_epilogue) # update cache dirname = "%s/rss" % (CFG_CACHEDIR) mymkdir(dirname) fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) try: # Remove the file just in case it already existed # so that a bit of space is created os.remove(fullfilename) except OSError: pass # Check if there's enough space to cache the request. if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS: try: os.umask(022) f = open(fullfilename, "w") f.write(rss_prologue + rss_body + rss_epilogue) f.close() except IOError, v: if v[0] == 36: # URL was too long. Never mind, don't cache pass else: raise repr(v) index = __call__ class WebInterfaceRecordExport(WebInterfaceDirectory): """ Handling of a /record/<recid>/export/<format> URL fragment """ _exports = output_formats def __init__(self, recid, format=None): self.recid = recid self.format = format for output_format in output_formats: self.__dict__[output_format] = self return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass # Check if the record belongs to a restricted primary # collection. If yes, redirect to the authenticated URL. user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and not isUserSuperAdmin(user_info): argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /record/123/export/xm or /record/123/export/xm/ index = __call__