diff --git a/modules/bibauthorid/lib/bibauthorid_tables_utils.py b/modules/bibauthorid/lib/bibauthorid_tables_utils.py index b18329782..268825c4e 100644 --- a/modules/bibauthorid/lib/bibauthorid_tables_utils.py +++ b/modules/bibauthorid/lib/bibauthorid_tables_utils.py @@ -1,1617 +1,1617 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ bibauthorid_tables_utils Bibauthorid's DB handler """ import sys import re import random import bibauthorid_config as bconfig import bibauthorid_structs as dat from bibauthorid_utils import split_name_parts, create_normalized_name from bibauthorid_utils import clean_name_string from bibauthorid_authorname_utils import update_doclist try: from search_engine import get_record - from search_engine import get_fieldvalues + from search_engine_utils import get_fieldvalues from bibrank_citation_searcher import get_citation_dict from dbquery import run_sql, run_sql_many from dbquery import OperationalError, ProgrammingError except ImportError: - from invenio.search_engine import get_fieldvalues + from invenio.search_engine_utils import get_fieldvalues from invenio.search_engine import get_record from invenio.bibrank_citation_searcher import get_citation_dict from invenio.dbquery import run_sql, run_sql_many from invenio.dbquery import OperationalError, ProgrammingError try: import unidecode UNIDECODE_ENABLED = True except ImportError: bconfig.LOGGER.error("Authorid will run without unidecode support! " "This is not recommended! Please install unidecode!") UNIDECODE_ENABLED = False def get_papers_recently_modified(date='00-00-00 00:00:00'): ''' Returns the bibrecs with modification date more recent then date, or all the bibrecs if no date is specified. @param date: date ''' papers = run_sql("select id from bibrec where modification_date > %s", (str(date),)) if papers: bibrecs = [i[0] for i in papers] bibrecs.append(-1) min_date = run_sql("select max(modification_date) from bibrec where " "id in " + str(tuple(bibrecs))) else: min_date = run_sql("select now()") return papers, min_date def populate_authornames_bibrefs_from_authornames(): ''' Populates aidAUTHORNAMESBIBREFS. For each entry in aidAUTHORNAMES creates a corresponding entry in aidA.B. so it's possible to search by bibrec/bibref at a reasonable speed as well and not only by name. ''' nids = run_sql("select id,bibrefs from aidAUTHORNAMES") for nid in nids: for bibref in nid[1].split(','): if bconfig.TABLES_UTILS_DEBUG: print ('populate_authornames_bibrefs_from_authornames: Adding: ' ' %s %s' % (str(nid[0]), str(bibref))) run_sql("insert into aidAUTHORNAMESBIBREFS (Name_id, bibref) " "values (%s,%s)", (str(nid[0]), str(bibref))) def authornames_tables_gc(bunch_size=50): ''' Performs garbage collecting on the authornames tables. Potentially really slow. ''' bunch_start = run_sql("select min(id) from aidAUTHORNAMESBIBREFS") if len(bunch_start) >= 1: bunch_start = int(bunch_start[0][0]) else: return abfs_ids_bunch = run_sql("select id,Name_id,bibref from aidAUTHORNAMESBIBREFS limit " + str(bunch_start - 1) + "," + str(bunch_size)) bunch_start += bunch_size while len(abfs_ids_bunch) >= 1: bib100list = [] bib700list = [] for i in abfs_ids_bunch: if i[2].split(':')[0] == '100': bib100list.append(i[2].split(':')[1]) elif i[2].split(':')[0] == '700': bib700list.append(i[2].split(':')[1]) bib100liststr = '( ' for i in bib100list: bib100liststr += "'" + str(i) + "'," bib100liststr = bib100liststr[0:len(bib100liststr) - 1] + " )" bib700liststr = '( ' for i in bib700list: bib700liststr += "'" + str(i) + "'," bib700liststr = bib700liststr[0:len(bib700liststr) - 1] + " )" if len(bib100list) >= 1: bib10xids = run_sql("select id from bib10x where id in %s" % bib100liststr) else: bib10xids = [] if len(bib700list) >= 1: bib70xids = run_sql("select id from bib70x where id in %s" % bib700liststr) else: bib70xids = [] bib10xlist = [] bib70xlist = [] for i in bib10xids: bib10xlist.append(str(i[0])) for i in bib70xids: bib70xlist.append(str(i[0])) bib100junk = set(bib100list).difference(set(bib10xlist)) bib700junk = set(bib700list).difference(set(bib70xlist)) idsdict = {} for i in abfs_ids_bunch: idsdict[i[2]] = [i[0], i[1]] junklist = [] for i in bib100junk: junklist.append('100:' + i) for i in bib700junk: junklist.append('700:' + i) for junkref in junklist: try: id_to_remove = idsdict[junkref] run_sql("delete from aidAUTHORNAMESBIBREFS where id=%s", (str(id_to_remove[0]),)) if bconfig.TABLES_UTILS_DEBUG: print "authornames_tables_gc: idAUTHORNAMESBIBREFS deleting row " + str(id_to_remove) authrow = run_sql("select id,Name,bibrefs,db_name from aidAUTHORNAMES where id=%s", (str(id_to_remove[1]),)) if len(authrow[0][2].split(',')) == 1: run_sql("delete from aidAUTHORNAMES where id=%s", (str(id_to_remove[1]),)) if bconfig.TABLES_UTILS_DEBUG: print "authornames_tables_gc: aidAUTHORNAMES deleting " + str(authrow) else: bibreflist = '' for ref in authrow[0][2].split(','): if ref != junkref: bibreflist += ref + ',' bibreflist = bibreflist[0:len(bibreflist) - 1] run_sql("update aidAUTHORNAMES set bibrefs=%s where id=%s", (bibreflist, id_to_remove[1])) if bconfig.TABLES_UTILS_DEBUG: print "authornames_tables_gc: aidAUTHORNAMES updating " + str(authrow) + ' with ' + str(bibreflist) except (OperationalError, ProgrammingError, KeyError, IndexError, ValueError, TypeError): pass abfs_ids_bunch = run_sql("select id,Name_id,bibref from aidAUTHORNAMESBIBREFS limit " + str(bunch_start - 1) + ',' + str(bunch_size)) bunch_start += bunch_size def update_authornames_tables_from_paper(papers_list=None): """ Updates the authornames tables with the names on the given papers list @param papers_list: list of the papers which have been updated (bibrecs) ((1,),) For each paper of the list gathers all names, bibrefs and bibrecs to be added to aidAUTHORNAMES table, taking care of updating aidA.B. as well NOTE: update_authornames_tables_from_paper: just to remember: get record would be faster but we don't have the bibref there, maybe there is a way to rethink everything not to use bibrefs? How to address authors then? """ def update_authornames_tables(name, bibref): ''' Update the tables for one bibref,name touple ''' authornames_row = run_sql("select id,Name,bibrefs,db_name from aidAUTHORNAMES where db_name like %s", (str(name),)) authornames_bibrefs_row = run_sql("select id,Name_id,bibref from aidAUTHORNAMESBIBREFS " "where bibref like %s", (str(bibref),)) #@XXX: update_authornames_tables: if i'm not wrong there should always be only one result; will be checked further on if ((len(authornames_row) > 1) or (len(authornames_bibrefs_row) > 1) or (len(authornames_row) < len(authornames_bibrefs_row))): if bconfig.TABLES_UTILS_DEBUG: print "update_authornames_tables: More then one result or missing authornames?? Something is wrong, not updating" + str(authornames_row) + str(authornames_bibrefs_row) return if len(authornames_row) == 1: # we have an hit for the name string; check if there is the 'new' bibref associated, # if yes there is nothing to do, otherwise shold add it here and in the ANbibrefs table if authornames_row[0][2].count(bibref) < 1: if bconfig.TABLES_UTILS_DEBUG: print 'update_authornames_tables: Adding new bibref to ' + str(authornames_row) + ' ' + str(name) + ' ' + str(bibref) run_sql("update aidAUTHORNAMES set bibrefs=%s where id=%s", (authornames_row[0][2] + ',' + str(bibref), authornames_row[0][0])) if len(authornames_bibrefs_row) < 1: # we have to add the bibref to the name, would be strange if it would be already there run_sql("insert into aidAUTHORNAMESBIBREFS (Name_id,bibref) values (%s,%s)", (authornames_row[0][0], str(bibref))) else: if bconfig.TABLES_UTILS_DEBUG: print 'update_authornames_tables: Nothing to add to ' + str(authornames_row) + ' ' + str(name) + ' ' + str(bibref) else: #@NOTE: update_authornames_tables: we don't have the name string in the db: the name associated to the bibref is changed # or this is a new name? Experimenting with bibulpload looks like if a name on a paper changes a new bibref is created; # if len(authornames_bibrefs_row) == 1: # If len(authornames_row) is zero but we have a row in authornames_bibrefs_row means that # the name string is changed, somehow! # @FIXME: update_authornames_tables: this case should really be considered? if bconfig.TABLES_UTILS_DEBUG: print 'update_authornames_tables: The name associated to the bibref is changed?? ' + str(name) + ' ' + str(bibref) else: artifact_removal = re.compile("[^a-zA-Z0-9]") authorname = "" test_name = name.decode('utf-8') if UNIDECODE_ENABLED: test_name = unidecode.unidecode(name.decode('utf-8')) raw_name = artifact_removal.sub("", test_name) if len(raw_name) > 1: authorname = name.decode('utf-8') if len(raw_name) > 1: dbname = authorname else: dbname = 'Error in name parsing!' clean_name = create_normalized_name(split_name_parts(name)) authornamesid = run_sql("insert into aidAUTHORNAMES (Name,bibrefs,db_name) values (%s,%s,%s)", (clean_name, str(bibref), dbname)) run_sql("insert into aidAUTHORNAMESBIBREFS (Name_id,bibref) values (%s,%s)", (authornamesid, str(bibref))) if bconfig.TABLES_UTILS_DEBUG: print 'update_authornames_tables: Created new name ' + str(authornamesid) + ' ' + str(name) + ' ' + str(bibref) tables = [['bibrec_bib10x', 'bib10x', '100__a', '100'], ['bibrec_bib70x', 'bib70x', '700__a', '700']] if not papers_list: papers_list = run_sql("select id from bibrec") for paper in papers_list: for table in tables: sqlstr = "select id_bibxxx from %s where id_bibrec=" % table[0] bibrefs = run_sql(sqlstr + "%s", (str(paper[0]),)) for ref in bibrefs: sqlstr = "select value from %s where tag='%s' and id=" % (table[1], table[2]) name = run_sql(sqlstr + "%s", (str(ref[0]),)) if len(name) >= 1: update_authornames_tables(name[0][0], table[3] + ':' + str(ref[0])) def populate_authornames(): """ Author names table population from bib10x and bib70x Average Runtime: 376.61 sec (6.27 min) for 327k entries Should be called only with empty table, then use update_authornames_tables_from_paper with the new papers which are coming in or modified. """ max_rows_per_run = bconfig.TABLE_POPULATION_BUNCH_SIZE if max_rows_per_run == -1: max_rows_per_run = 5000 max100 = run_sql("SELECT COUNT(id) FROM bib10x WHERE tag = '100__a'") max700 = run_sql("SELECT COUNT(id) FROM bib70x WHERE tag = '700__a'") tables = "bib10x", "bib70x" authornames_is_empty_checked = 0 authornames_is_empty = 1 # Bring author names from bib10x and bib70x to authornames table for table in tables: if table == "bib10x": table_number = "100" else: table_number = "700" querylimiter_start = 0 querylimiter_max = eval('max' + str(table_number) + '[0][0]') if bconfig.TABLES_UTILS_DEBUG: print "\nProcessing %s (%s entries):" % (table, querylimiter_max) sys.stdout.write("0% ") sys.stdout.flush() while querylimiter_start <= querylimiter_max: if bconfig.TABLES_UTILS_DEBUG: sys.stdout.write(".") sys.stdout.flush() percentage = int(((querylimiter_start + max_rows_per_run) * 100) / querylimiter_max) sys.stdout.write(".%s%%." % (percentage)) sys.stdout.flush() # Query the Database for a list of authors from the correspondent # tables--several thousands at a time bib = run_sql("SELECT id, value FROM %s WHERE tag = '%s__a' " "LIMIT %s, %s" % (table, table_number, querylimiter_start, max_rows_per_run)) authorexists = None querylimiter_start += max_rows_per_run for i in bib: # For mental sanity, exclude things that are not names... # Yes, I know that there are strange names out there! # Yes, I read the 40 misconceptions about names. # Yes, I know! # However, these statistical outlaws are harmful. artifact_removal = re.compile("[^a-zA-Z0-9]") authorname = "" if not i[1]: continue test_name = i[1].decode('utf-8') if UNIDECODE_ENABLED: test_name = unidecode.unidecode(i[1].decode('utf-8')) raw_name = artifact_removal.sub("", test_name) if len(raw_name) > 1: authorname = i[1].decode('utf-8') if not authorname: continue if not authornames_is_empty_checked: authornames_is_empty = run_sql("SELECT COUNT(id) " "FROM aidAUTHORNAMES") if authornames_is_empty[0][0] == 0: authornames_is_empty_checked = 1 authornames_is_empty = 1 if not authornames_is_empty: # Find duplicates in the database and append id if # duplicate is found authorexists = run_sql("SELECT id, name, bibrefs, db_name " "FROM aidAUTHORNAMES " "WHERE db_name = %s", (authorname.encode("utf-8"),)) bibrefs = "%s:%s" % (table_number, i[0]) if not authorexists: insert_name = "" if len(authorname) > 240: bconfig.LOGGER.warn("\nName too long, truncated to 254" " chars: %s" % (authorname)) insert_name = authorname[0:254] else: insert_name = authorname cnn = create_normalized_name snp = split_name_parts aid_name = authorname if UNIDECODE_ENABLED: aid_name = cnn(snp(unidecode.unidecode(insert_name))) aid_name = aid_name.replace("\"", "") else: aid_name = cnn(snp(insert_name)) aid_name = aid_name.replace(u"\u201c", "") aid_name = aid_name.replace(u"\u201d", "") run_sql("INSERT INTO aidAUTHORNAMES VALUES" " (NULL, %s, %s, %s)", (aid_name.encode('utf-8'), bibrefs, insert_name.encode('utf-8'))) if authornames_is_empty: authornames_is_empty = 0 else: if authorexists[0][2].count(bibrefs) >= 0: upd_bibrefs = "%s,%s" % (authorexists[0][2], bibrefs) run_sql("UPDATE aidAUTHORNAMES SET bibrefs = " "%s WHERE id = %s", (upd_bibrefs, authorexists[0][0])) if bconfig.TABLES_UTILS_DEBUG: sys.stdout.write(" Done.") sys.stdout.flush() def get_bibref_name_string(bibref): ''' Returns the name string associated with the given bibref @param: bibref ((100:123,),) ''' name = run_sql("select db_name from aidAUTHORNAMES where id=(select Name_id from aidAUTHORNAMESBIBREFS where bibref=%s)", (str(bibref[0][0]),)) if len(name) > 0: return name[0][0] else: return '' def get_bibrefs_from_name_string(string): ''' Returns bibrefs associated to a name string @param: string: name ''' bibrefs = run_sql("select bibrefs from aidAUTHORNAMES where db_name=%s ", (str(string),)) return bibrefs def get_diff_marc10x70x_to_anames(): ''' Determines the difference between the union of bib10x and bib70x and the aidAUTHORNAMES table. It will return the entries which are present in bib10x and bib70x but not in aidAUTHORNAMES. Meant to be run periodically. @todo: get_diff_marc10x70x_to_anames: find meaningful use for the returned results. @return: a list of the author names not contained in the authornames table @rtype: list ''' run_sql("DROP VIEW authors") run_sql("create view authors AS \ (SELECT value FROM bib10x WHERE tag =\"100__a\") \ UNION \ (SELECT value FROM bib70x WHERE tag =\"700__a\")") diff = run_sql("SELECT value from authors LEFT JOIN aidAUTHORNAMES as b" + " ON (authors.value = b.Name) WHERE b.name IS NULL") return diff def populate_doclist_for_author_surname(surname, surname_variations=None): """ Searches for all the documents containing a given surname and processes them: creates the virtual author for each author on a document. @param surname: The search is based on this last name. @type surname: string """ # if not dat.CITES_DICT: # cites = get_citation_dict("citationdict") # # for key in cites: # dat.CITES_DICT[key] = cites[key] # # if not dat.CITED_BY_DICT: # cited_by = get_citation_dict("reversedict") # # for key in cited_by: # dat.CITED_BY_DICT[key] = cited_by[key] bconfig.LOGGER.log(25, "Populating document list for %s" % (surname)) if surname_variations: init_authornames(surname, surname_variations) else: init_authornames(surname) authors = [row for row in dat.AUTHOR_NAMES if not row['processed']] for author in authors: marc_100 = [] marc_700 = [] temp_marc = author['bibrefs'].split(',') for j in temp_marc: marcfield, internalid = j.split(':') if marcfield == '100': marc_100.append(internalid) elif marcfield == '700': marc_700.append(internalid) else: bconfig.LOGGER.error("Wrong MARC field. How did you do" " that?!--This should never happen! boo!") bibrecs = [] if marc_100: for m100 in marc_100: refinfo = run_sql("SELECT id_bibrec FROM bibrec_bib10x " "WHERE id_bibxxx = %s", (m100,)) if refinfo: for recid in refinfo: bibrecs.append((recid[0], "100:%s" % m100)) if marc_700: for m700 in marc_700: refinfo = run_sql("SELECT id_bibrec FROM bibrec_bib70x " "WHERE id_bibxxx = %s", (m700,)) if refinfo: for recid in refinfo: bibrecs.append((recid[0], "700:%s" % m700)) relevant_records = [] for bibrec in bibrecs: go_next = False for value in get_fieldvalues(bibrec[0], "980__c"): if value.lower().count('delete'): go_next = True if go_next: continue for value in get_fieldvalues(bibrec[0], "980__a"): if value.lower().count('delet'): go_next = True if bconfig.EXCLUDE_COLLECTIONS: if value in bconfig.EXCLUDE_COLLECTIONS: go_next = True break if bconfig.LIMIT_TO_COLLECTIONS: if not value in bconfig.LIMIT_TO_COLLECTIONS: go_next = True else: go_next = False break if go_next: continue relevant_records.append(bibrec) if load_records_to_mem_cache([br[0] for br in relevant_records]): for bibrec in relevant_records: update_doclist(bibrec[0], author['id'], bibrec[1]) def load_records_to_mem_cache(bibrec_ids): ''' Loads all the records specified in the list into the memory storage facility. It will try to attach citation information to each record in the process. @param bibrec_ids: list of bibrec IDs to load to memory @type bibrec_ids: list @return: Success (True) or failure (False) of the process @rtype: boolean ''' if not bibrec_ids: return False for bibrec in bibrec_ids: if not bibrec in dat.RELEVANT_RECORDS: rec = get_record(bibrec) if bconfig.LIMIT_AUTHORS_PER_DOCUMENT: is_collaboration = False authors = 0 try: for field in rec['710'][0][0]: if field[0] == 'g': is_collaboration = True break except KeyError: pass if is_collaboration: # If experimentalists shall be excluded uncomment # the following line #continue pass else: try: for field in rec['100'][0][0]: if field[0] == 'a': authors += 1 break except KeyError: pass try: for coauthor in rec['700']: if coauthor[0][0][0] == 'a': authors += 1 except KeyError: pass if authors > bconfig.MAX_AUTHORS_PER_DOCUMENT: continue dat.RELEVANT_RECORDS[bibrec] = rec cites = [] cited_by = [] try: # cites = dat.CITES_DICT[bibrec] cites = get_citation_dict("citationdict")[bibrec] except KeyError: pass try: # cited_by = dat.CITED_BY_DICT[bibrec] cited_by = get_citation_dict("reversedict")[bibrec] except KeyError: pass dat.RELEVANT_RECORDS[bibrec]['cites'] = cites dat.RELEVANT_RECORDS[bibrec]['cited_by'] = cited_by return True def init_authornames(surname, lastname_variations=None): ''' Initializes the AUTHOR_NAMES memory storage @param surname: The surname to search for @type surname: string ''' if len(dat.AUTHOR_NAMES) > 0: existing = [row for row in dat.AUTHOR_NAMES if row['name'].split(",")[0] == surname] if existing: bconfig.LOGGER.log(25, "AUTHOR_NAMES already holds the " "correct data.") else: bconfig.LOGGER.debug("AUTHOR_NAMES will have additional content") for updated in [row for row in dat.AUTHOR_NAMES if not row['processed']]: updated['processed'] = True _perform_authornames_init(surname) else: _perform_authornames_init(surname, lastname_variations) def _perform_authornames_init(surname, lastname_variations=None): ''' Performs the actual AUTHOR_NAMES memory storage init by reading values from the database @param surname: The surname to search for @type surname: string ''' db_authors = None if len(surname) < 4 and not lastname_variations: lastname_variations = [surname] if (not lastname_variations or (lastname_variations and [nm for nm in lastname_variations if nm.count("\\")])): sql_query = (r"SELECT id, name, bibrefs, db_name FROM aidAUTHORNAMES " "WHERE name REGEXP %s") if (lastname_variations and [nm for nm in lastname_variations if nm.count("\\")]): x = sorted(lastname_variations, key=lambda k:len(k), reverse=True) # In order to fight escaping problems, we fall back to regexp mode # if we find a backslash somewhere. surname = x[0] # instead of replacing with ' ', this will construct the regex for the # SQL query as well as the next if statements. surname = clean_name_string(surname, replacement="[^0-9a-zA-Z]{0,2}", keep_whitespace=False) if not surname.startswith("[^0-9a-zA-Z]{0,2}"): surname = "[^0-9a-zA-Z]{0,2}%s" % (surname) if not surname.startswith("^"): surname = "^%s" % surname surname = surname + "[^0-9a-zA-Z ]{1,2}" if surname.count("\\"): surname.replace("\\", ".") try: db_authors = run_sql(sql_query, (surname,)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.exception("Not able to select author name: %s" % emsg) else: qnames = [] vari_query = "" for vname in lastname_variations: if vari_query: vari_query += " OR" vari_query += ' name like %s' vname_r = r"""%s""" % vname qnames.append(vname_r + ", %") if not vari_query: return sql_query = ("SELECT id, name, bibrefs, db_name " "FROM aidAUTHORNAMES WHERE" + vari_query) try: db_authors = run_sql(sql_query, tuple(qnames)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.exception("Not able to select author name: %s" % emsg) if not db_authors: return for author in db_authors: dat.AUTHOR_NAMES.append({'id': author[0], 'name': author[1].decode('utf-8'), 'bibrefs': author[2], 'db_name': author[3].decode('utf-8'), 'processed': False}) def find_all_last_names(): ''' Filters out all last names from all names in the database. @return: a list of last names @rtype: list of strings ''' all_names = run_sql("SELECT Name FROM aidAUTHORNAMES") last_names = set() for dbname in all_names: if not dbname: continue full_name = dbname[0] name = split_name_parts(full_name.decode('utf-8'))[0] # For mental sanity, exclude things that are not names... # - Single letter names # - Single number names # - Names containing only numbers and/or symbols # Yes, I know that there are strange names out there! # Yes, I read the 40 misconceptions about names. # Yes, I know! # However, these statistical outlaws are harmful to the data set. artifact_removal = re.compile("[^a-zA-Z0-9]") authorname = None test_name = name if UNIDECODE_ENABLED: test_name = unidecode.unidecode(name) raw_name = artifact_removal.sub("", test_name) if len(raw_name) > 1: authorname = name if not authorname: continue if len(raw_name) > 1: last_names.add(authorname) del(all_names) return list(last_names) def write_mem_cache_to_tables(sanity_checks=False): ''' Reads every memory cache and writes its contents to the appropriate table in the database. @param sanity_checks: Perform sanity checks before inserting (i.e. is the data already present in the db?) and after the insertion (i.e. is the data entered correctly?) @type sanity_checks: boolean ''' ra_id_offset = run_sql("SELECT max(realauthorID) FROM" + " aidREALAUTHORS")[0][0] va_id_offset = run_sql("SELECT max(virtualauthorID) FROM" + " aidVIRTUALAUTHORS")[0][0] cluster_id_offset = run_sql("SELECT max(id) FROM" " aidVIRTUALAUTHORSCLUSTERS")[0][0] if not ra_id_offset: ra_id_offset = 0 if not va_id_offset: va_id_offset = 0 if not cluster_id_offset: cluster_id_offset = 0 max_va_id = dat.ID_TRACKER["va_id_counter"] if max_va_id <= 1: max_va_id = 2 random_va_id = random.randint(1, max_va_id - 1) va_mem_data = [row['value'] for row in dat.VIRTUALAUTHOR_DATA if (row["virtualauthorid"] == random_va_id and row['tag'] == "orig_authorname_id")][0] if sanity_checks: if va_mem_data: check_on_va = run_sql("SELECT id,virtualauthorID,tag,value FROM aidVIRTUALAUTHORSDATA " "WHERE tag='orig_authorname_id' AND " "value=%s" , (va_mem_data,)) if check_on_va: bconfig.LOGGER.error("Sanity check reported that the data " "exists. We'll skip this record for now. " "Please check the data set manually.") return False bconfig.LOGGER.log(25, "Writing to persistence layer") bconfig.LOGGER.log(25, "Offsets...RA: %s; VA: %s; CL: %s" % (ra_id_offset, va_id_offset, cluster_id_offset)) # batch_max = bconfig.TABLE_POPULATION_BUNCH_SIZE query = [] query_prelude = ("INSERT INTO aidVIRTUALAUTHORSCLUSTERS (cluster_name)" " VALUES (%s)") for va_cluster in dat.VIRTUALAUTHOR_CLUSTERS: encoded_value = None not_encoded_value = va_cluster['clustername'] try: if isinstance(not_encoded_value, unicode): encoded_value = not_encoded_value[0:59].encode('utf-8') elif isinstance(not_encoded_value, str): encoded_value = not_encoded_value[0:59] else: encoded_value = str(not_encoded_value)[0:59] except (UnicodeEncodeError, UnicodeDecodeError), emsg: bconfig.LOGGER.error("Cluster Data encoding error (%s): %s" % (type(not_encoded_value), emsg)) continue query.append((encoded_value,)) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into virtual author " "cluster table failed: %s" % emsg) return False query = [] query_prelude = ("INSERT INTO aidVIRTUALAUTHORSDATA " "(virtualauthorID, tag, value) VALUES " "(%s, %s, %s)") for va_data in dat.VIRTUALAUTHOR_DATA: encoded_value = None not_encoded_value = va_data['value'] try: if isinstance(not_encoded_value, unicode): encoded_value = not_encoded_value[0:254].encode('utf-8') elif isinstance(not_encoded_value, str): encoded_value = not_encoded_value[0:254] else: encoded_value = str(not_encoded_value)[0:254] except (UnicodeEncodeError, UnicodeDecodeError), emsg: bconfig.LOGGER.error("VA Data encoding error (%s): %s" % (type(not_encoded_value), emsg)) continue query.append((va_data['virtualauthorid'] + va_id_offset, va_data['tag'], encoded_value)) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into virtual author " "data table failed: %s" % emsg) return False query = [] query_prelude = ("INSERT INTO aidVIRTUALAUTHORS " "(virtualauthorID, authornamesID, p, clusterID) " "VALUES (%s, %s, %s, %s)") for va_entry in dat.VIRTUALAUTHORS: query.append((va_entry['virtualauthorid'] + va_id_offset, va_entry['authornamesid'], va_entry['p'], va_entry['clusterid'] + cluster_id_offset)) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into virtual author " "table failed: %s" % emsg) return False query = [] query_prelude = ("INSERT INTO aidREALAUTHORDATA " "(realauthorID, tag, value, va_count, " "va_names_p, va_p) VALUES " "(%s, %s, %s, %s, %s, %s)") for ra_data in dat.REALAUTHOR_DATA: if not ra_data['tag'] == 'outgoing_citation': encoded_value = None not_encoded_value = ra_data['value'] try: if isinstance(not_encoded_value, unicode): encoded_value = not_encoded_value[0:254].encode('utf-8') elif isinstance(not_encoded_value, str): encoded_value = not_encoded_value[0:254] else: encoded_value = str(not_encoded_value)[0:254] except (UnicodeEncodeError, UnicodeDecodeError), emsg: bconfig.LOGGER.error("RA Data encoding error (%s): %s" % (type(not_encoded_value), emsg)) continue query.append((ra_data['realauthorid'] + ra_id_offset, ra_data['tag'], encoded_value, ra_data['va_count'], ra_data['va_np'], ra_data['va_p'])) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into real author " "data table failed: %s" % emsg) return False query = [] query_prelude = ("INSERT INTO aidREALAUTHORS " "(realauthorID, virtualauthorID, p) VALUES (%s, %s, %s)") for ra_entry in dat.REALAUTHORS: query.append((ra_entry['realauthorid'] + ra_id_offset, ra_entry['virtualauthorid'] + va_id_offset, ra_entry['p'])) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into real author " "table failed: %s" % emsg) return False query = [] query_prelude = ("INSERT INTO aidDOCLIST " "(bibrecID, processed_author) VALUES (%s, %s)") for doc in dat.DOC_LIST: for processed_author in doc['authornameids']: query.append((doc['bibrecid'], processed_author)) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into doc list " "table failed: %s" % emsg) return False query = [] if sanity_checks: if va_mem_data: check_on_va = run_sql("SELECT id,virtualauthorID,tag,value FROM aidVIRTUALAUTHORSDATA " "WHERE tag='orig_authorname_id' AND " "value=%s" , (va_mem_data,)) if not check_on_va: bconfig.LOGGER.error("Sanity check reported that no data " " exists in the database after writing " " to it.") return False bconfig.LOGGER.log(25, "Everything is now written to the database. " "Thanks. Bye.") return True def get_existing_last_names(): ''' Find all authors that have been processed and written to the database. Extract all last names from this list and return these last names. Especially helpful to exclude these clusters (last names) from a run. @return: list of last names @rtype: list of strings ''' bconfig.LOGGER.log(25, "Reading info about existing authors from database") db_lnames = set() db_names = run_sql("select value from aidVIRTUALAUTHORSDATA where" + " tag='orig_name_string'") for i in db_names: db_lnames.add(i[0].split(',')[0]) del(db_names) return list(db_lnames) def get_len_authornames_bibrefs(): ''' Reads the lengths of authornames and bibrefs. Used to determine if esstential tables already exist. @return: dict({'names':-1, 'bibrefs':-1}) @rtype: dict ''' lengths = {'names':-1, 'bibrefs':-1} if check_and_create_aid_tables(): authornames_len = run_sql("SELECT count(id) from aidAUTHORNAMES") bibrefs_len = run_sql("SELECT count(id) from aidAUTHORNAMESBIBREFS") try: lengths['names'] = int(authornames_len[0][0]) lengths['bibrefs'] = int(bibrefs_len[0][0]) except (ValueError, TypeError): lengths['names'] = -1 lengths['bibrefs'] = -1 return lengths def check_and_create_aid_tables(): ''' Checks if the database tables for Bibauthorid exist. If not, creates them @return: True if tables exist, False if there was an error @rtype: boolean ''' try: if not run_sql("show tables like 'aidAUTHORNAMES';"): return False except (ProgrammingError, OperationalError): return False return True def load_mem_cache_from_tables(): ''' Loads database content for an author's last name cluster into the memory storage facility. @precondition: memory storage facility needs to be loaded with respective authornames data (init_authornames(lastname)) @return: Success (True) or failure (False) of the loading process @rtype: boolean ''' # print "check for authornames mem table" if not dat.AUTHOR_NAMES: return False authornames_ids = [row['id'] for row in dat.AUTHOR_NAMES] if not authornames_ids: return False # print "Building offsets" ra_id_offset = run_sql("SELECT max(realauthorID) FROM" " aidREALAUTHORS")[0][0] va_id_offset = run_sql("SELECT max(virtualauthorID) FROM" " aidVIRTUALAUTHORS")[0][0] cluster_id_offset = run_sql("SELECT max(id) FROM" " aidVIRTUALAUTHORSCLUSTERS")[0][0] dat.set_tracker("raid_counter", ra_id_offset + 1) dat.set_tracker("va_id_counter", va_id_offset + 1) dat.set_tracker("cluster_id", cluster_id_offset + 1) # print "working on authornames ids..." for authornames_id in authornames_ids: db_vas = run_sql("SELECT virtualauthorid, authornamesid, p, clusterid " "from aidVIRTUALAUTHORS WHERE authornamesid = %s", (authornames_id,)) # print "loading VAs for authid %s" % authornames_id db_vas_set = set([row[0] for row in db_vas]) if not db_vas_set: db_vas_set = (-1, -1) else: db_vas_set.add(-1) db_vas_tuple = tuple(db_vas_set) db_ras = run_sql("SELECT realauthorid FROM " "aidREALAUTHORS WHERE virtualauthorid in %s" , (tuple(db_vas_tuple),)) if db_ras: db_ras_set = set([row[0] for row in db_ras]) db_ras_set.add(-1) db_ras_tuple = tuple(db_ras_set) db_ra_vas = run_sql("SELECT virtualauthorid FROM aidREALAUTHORS " "WHERE realauthorid in %s", (db_ras_tuple,)) db_ra_vas_set = set([row[0] for row in db_ra_vas]) db_ra_vas_set.add(-1) db_ras_tuple = tuple(db_ra_vas_set) db_vas_all = run_sql("SELECT virtualauthorid, authornamesid, p, " "clusterid FROM aidVIRTUALAUTHORS WHERE " "virtualauthorid in %s", (db_ras_tuple,)) else: db_vas_all = db_vas for db_va in db_vas_all: dat.VIRTUALAUTHORS.append({'virtualauthorid': db_va[0], 'authornamesid': db_va[1], 'p': db_va[2], 'clusterid': db_va[3]}) if not dat.VIRTUALAUTHORS: # print "No Virtual Authors loaded. None created before." return True # print "Loading clusters" cluster_ids = set([row['clusterid'] for row in dat.VIRTUALAUTHORS]) if not cluster_ids: cluster_ids = (-1, -1) else: cluster_ids.add(-1) db_va_clusters = run_sql("SELECT id, cluster_name FROM " "aidVIRTUALAUTHORSCLUSTERS WHERE id in %s" , (tuple(cluster_ids),)) # print "Storing clusters" for db_va_cluster in db_va_clusters: dat.VIRTUALAUTHOR_CLUSTERS.append({'clusterid': db_va_cluster[0], 'clustername': db_va_cluster[1]}) # print "Loading VA data" va_ids = set([row['virtualauthorid'] for row in dat.VIRTUALAUTHORS]) if not va_ids: va_ids = (-1, -1) else: va_ids.add(-1) # print "Storing VA data" db_va_data = run_sql("SELECT virtualauthorid, tag, value FROM " "aidVIRTUALAUTHORSDATA WHERE virtualauthorid in %s" , (tuple(va_ids),)) for db_va_dat in db_va_data: dat.VIRTUALAUTHOR_DATA.append({'virtualauthorid' : db_va_dat[0], 'tag': db_va_dat[1], 'value': db_va_dat[2]}) # print "Loading RAs" db_ras = run_sql("SELECT realauthorid, virtualauthorid, p FROM " "aidREALAUTHORS WHERE virtualauthorid in %s" , (tuple(va_ids),)) # print "Storing RAs" for db_ra in db_ras: dat.REALAUTHORS.append({'realauthorid': db_ra[0], 'virtualauthorid': db_ra[1], 'p': db_ra[2]}) # print "Loading RA data" ra_ids = set([row['realauthorid'] for row in dat.REALAUTHORS]) if not ra_ids: ra_ids = (-1, -1) else: ra_ids.add(-1) db_ra_data = run_sql("SELECT realauthorid, tag, value, va_count, " "va_names_p, va_p FROM aidREALAUTHORDATA WHERE " "realauthorid in %s", (tuple(ra_ids),)) # print "Storing RA data" for db_ra_dat in db_ra_data: dat.REALAUTHOR_DATA.append({'realauthorid': db_ra_dat[0], 'tag': db_ra_dat[1], 'value': db_ra_dat[2], 'va_count': db_ra_dat[3], 'va_np': db_ra_dat[4], 'va_p': db_ra_dat[5]}) # print "Loading doclist entries" bibrec_ids = set([int(row['value']) for row in dat.REALAUTHOR_DATA if row['tag'] == "bibrec_id"]) if not bibrec_ids: bibrec_ids = (-1, -1) else: bibrec_ids.add(-1) db_doclist = run_sql("SELECT bibrecid, processed_author FROM aidDOCLIST " "WHERE bibrecid in %s", (tuple(bibrec_ids),)) # print "Storing doclist entries" for db_doc in db_doclist: existing_item = [row for row in dat.DOC_LIST if row['bibrecid'] == db_doc[0]] if existing_item: for update in [row for row in dat.DOC_LIST if row['bibrecid'] == db_doc[0]]: if not db_doc[1] in update['authornameids']: update['authornameids'].append(db_doc[1]) else: dat.DOC_LIST.append({'bibrecid': db_doc[0], 'authornameids': [db_doc[1]]}) if set(bibrec_ids).remove(-1): # print "will load recs" if not load_records_to_mem_cache(list(bibrec_ids)): # print" FAILED loading records" return False return True def update_tables_from_mem_cache(sanity_checks=False, return_ra_updates=False): ''' Updates the tables in the database with the information in the memory storage while taking into account only changed data to optimize the time needed for the update. @param sanity_checks: Perform sanity checks while updating--slows down the process but might detect mistakes and prevent harm. Default: False @type sanity_checks: boolean @param return_ra_updates: Will force the method to return a list of real author ids that have been updated. Default: False @type return_ra_updates: boolean @return: Either True if update went through without trouble or False if it did not and a list of updated real authors or an empty list @rtype: tuple of (boolean, list) ''' del_ra_ids = set([-1]) del_va_ids = dat.UPDATES_LOG['deleted_vas'].union( dat.UPDATES_LOG['touched_vas']) if del_va_ids: del_va_ids.add(-1) del_ra_ids_db = run_sql("SELECT realauthorid FROM aidREALAUTHORS " "WHERE virtualauthorid in %s" , (tuple(del_va_ids),)) for ra_id in del_ra_ids_db: del_ra_ids.add(ra_id[0]) if sanity_checks: va_count_db = run_sql("SELECT COUNT(DISTINCT virtualauthorid) " "FROM aidVIRTUALAUTHORS WHERE " "virtualauthorid in %s" , (tuple(del_va_ids),)) try: va_count_db = int(va_count_db[0][0]) except (ValueError, IndexError, TypeError): bconfig.LOGGER.exception("Error while reading number of " "virtual authors in database") va_count_db = -1 if not (va_count_db == len(del_va_ids)): bconfig.LOGGER.error("Sanity checks reported that the number " "of virtual authors in the memory " "storage is not equal to the number of " "virtual authors in the database. " "Aborting update mission.") return (False, []) bconfig.LOGGER.log(25, "Removing updated entries from " "persistence layer") run_sql("DELETE FROM aidVIRTUALAUTHORSDATA " "WHERE virtualauthorid in %s", (tuple(del_va_ids),)) run_sql("DELETE FROM aidVIRTUALAUTHORS " "WHERE virtualauthorid in %s", (tuple(del_va_ids),)) if len(tuple(del_ra_ids)) > 1: run_sql("DELETE FROM aidREALAUTHORDATA " "WHERE realauthorid in %s", (tuple(del_ra_ids),)) run_sql("DELETE FROM aidREALAUTHORS " "WHERE realauthorid in %s", (tuple(del_ra_ids),)) insert_ra_ids = dat.UPDATES_LOG['new_ras'].union(del_ra_ids) insert_va_ids = dat.UPDATES_LOG['new_vas'].union( dat.UPDATES_LOG['touched_vas']) bconfig.LOGGER.log(25, "Writing to persistence layer") ra_id_db_max = run_sql("SELECT max(realauthorID) FROM" " aidREALAUTHORS")[0][0] va_id_db_max = run_sql("SELECT max(virtualauthorID) FROM" " aidVIRTUALAUTHORS")[0][0] cluster_id_db_max = run_sql("SELECT max(id) FROM" " aidVIRTUALAUTHORSCLUSTERS")[0][0] if not ra_id_db_max or not va_id_db_max or not cluster_id_db_max: return (False, []) new_clusters = [row for row in dat.VIRTUALAUTHOR_CLUSTERS if row['clusterid'] > cluster_id_db_max] query = [] if not insert_ra_ids or not insert_va_ids: bconfig.LOGGER.log(25, "Saving update to persistence layer finished " "with success! (There was nothing to do)") return (True, []) query_prelude = ("INSERT INTO aidVIRTUALAUTHORSCLUSTERS (cluster_name)" " VALUES (%s)") for va_cluster in new_clusters: encoded_value = None not_encoded_value = va_cluster['clustername'] try: if isinstance(not_encoded_value, unicode): encoded_value = not_encoded_value[0:59].encode('utf-8') elif isinstance(not_encoded_value, str): encoded_value = not_encoded_value[0:59] else: encoded_value = str(not_encoded_value)[0:59] except (UnicodeEncodeError, UnicodeDecodeError), emsg: bconfig.LOGGER.error("Cluster Data encoding error (%s): %s" % (type(not_encoded_value), emsg)) continue query.append((encoded_value,)) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into virtual author " "cluster table failed: %s" % emsg) return False query = [] va_data_to_insert = [row for row in dat.VIRTUALAUTHOR_DATA if row['virtualauthorid'] in insert_va_ids] if sanity_checks: db_existing_va_ids = run_sql("SELECT COUNT(DISTINCT virtualauthorid) " "WHERE virtualauthorid in %s" , (tuple(insert_va_ids),)) try: db_existing_va_ids = int(va_count_db[0][0]) except (ValueError, IndexError, TypeError): bconfig.LOGGER.exception("Error while reading number of " "virtual authors in database") db_existing_va_ids = -1 if not (db_existing_va_ids == 0): bconfig.LOGGER.error("Sanity checks reported that the " "virtual authors in the memory storage " "that shall be inserted already exist " "in the database. Aborting update mission.") return (False, []) query_prelude = ("INSERT INTO aidVIRTUALAUTHORSDATA " "(virtualauthorID, tag, value) VALUES " "(%s, %s, %s)") for va_data in va_data_to_insert: encoded_value = None not_encoded_value = va_data['value'] try: if isinstance(not_encoded_value, unicode): encoded_value = not_encoded_value[0:254].encode('utf-8') elif isinstance(not_encoded_value, str): encoded_value = not_encoded_value[0:254] else: encoded_value = str(not_encoded_value)[0:254] except (UnicodeEncodeError, UnicodeDecodeError), emsg: bconfig.LOGGER.error("VA Data encoding error (%s): %s" % (type(not_encoded_value), emsg)) continue query.append((va_data['virtualauthorid'], va_data['tag'], encoded_value)) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into virtual author " "data table failed: %s" % emsg) return False query = [] vas_to_insert = [row for row in dat.VIRTUALAUTHORS if row['virtualauthorid'] in insert_va_ids] query_prelude = ("INSERT INTO aidVIRTUALAUTHORS " "(virtualauthorID, authornamesID, p, clusterID) " "VALUES (%s, %s, %s, %s)") for va_entry in vas_to_insert: query.append((va_entry['virtualauthorid'], va_entry['authornamesid'], va_entry['p'], va_entry['clusterid'])) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into virtual author " "table failed: %s" % emsg) return False query = [] if sanity_checks: db_existing_ra_ids = run_sql("SELECT COUNT(DISTINCT realauthorid) " "WHERE realauthorid in %s" , (tuple(insert_ra_ids),)) try: db_existing_ra_ids = int(db_existing_ra_ids[0][0]) except (ValueError, IndexError, TypeError): bconfig.LOGGER.exception("Error while reading number of " "real authors in database") db_existing_va_ids = -1 if not (db_existing_ra_ids == 0): bconfig.LOGGER.error("Sanity checks reported that the " "real authors in the memory storage " "that shall be inserted already exist " "in the database. Aborting update mission.") return (False, []) ra_data_to_insert = [row for row in dat.REALAUTHOR_DATA if row['realauthorid'] in insert_ra_ids] query_prelude = ("INSERT INTO aidREALAUTHORDATA " "(realauthorID, tag, value, va_count, " "va_names_p, va_p) VALUES " "(%s, %s, %s, %s, %s, %s)") for ra_data in ra_data_to_insert: if not ra_data['tag'] == 'outgoing_citation': encoded_value = None not_encoded_value = ra_data['value'] try: if isinstance(not_encoded_value, unicode): encoded_value = not_encoded_value[0:254].encode('utf-8') elif isinstance(not_encoded_value, str): encoded_value = not_encoded_value[0:254] else: encoded_value = str(not_encoded_value)[0:254] except (UnicodeEncodeError, UnicodeDecodeError), emsg: bconfig.LOGGER.error("RA Data encoding error (%s): %s" % (type(not_encoded_value), emsg)) continue query.append((ra_data['realauthorid'], ra_data['tag'], encoded_value, ra_data['va_count'], ra_data['va_np'], ra_data['va_p'])) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into real author " "data table failed: %s" % emsg) return False query = [] query_prelude = ("INSERT INTO aidREALAUTHORS " "(realauthorID, virtualauthorID, p) VALUES (%s, %s, %s)") ras_to_insert = [row for row in dat.REALAUTHORS if row['realauthorid'] in insert_ra_ids] for ra_entry in ras_to_insert: query.append((ra_entry['realauthorid'], ra_entry['virtualauthorid'], ra_entry['p'])) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into real author " "table failed: %s" % emsg) return False query = [] if sanity_checks: db_existing_ra_ids = run_sql("SELECT COUNT(DISTINCT realauthorid) " "WHERE realauthorid in %s" , (tuple(insert_ra_ids),)) try: db_existing_ra_ids = int(db_existing_ra_ids[0][0]) except (ValueError, IndexError, TypeError): bconfig.LOGGER.exception("Error while reading number of " "real authors in database") db_existing_ra_ids = -1 if not (db_existing_ra_ids == len(insert_ra_ids)): bconfig.LOGGER.error("Sanity checks reported that the number of" "real authors in the memory storage " "that shall be inserted is not equal to " "the number of real authors now " "in the database. Aborting update mission.") return (False, []) recid_updates = dat.UPDATES_LOG["rec_updates"] if recid_updates: recid_updates.add(-1) run_sql("DELETE FROM aidDOCLIST WHERE bibrecid in %s" , (tuple(recid_updates),)) doclist_insert = [row for row in dat.DOC_LIST if row['bibrecid'] in dat.UPDATES_LOG["rec_updates"]] query_prelude = ("INSERT INTO aidDOCLIST " "(bibrecID, processed_author) VALUES (%s, %s)") for doc in doclist_insert: for processed_author in doc['authornameids']: query.append((doc['bibrecid'], processed_author)) if query: try: run_sql_many(query_prelude, tuple(query)) except (OperationalError, ProgrammingError), emsg: bconfig.LOGGER.critical("Inserting into doc list " "table failed: %s" % emsg) return False query = [] bconfig.LOGGER.log(25, "Saving update to persistence layer finished " "with success!") if return_ra_updates: ra_ids = [[row['realauthorid']] for row in ras_to_insert] return (True, ra_ids) else: return (True, []) def empty_aid_tables(): ''' Will empty all tables needed for a re-run of the algorithm. Exceptions are aidAUTHORNAMES*, which have to be updated apriori and - aidPERSONID, which has to be updated from algorithm after the re-run. + aidPERSONID, which has to be updated from algorithm after the re-run. ''' run_sql("TRUNCATE `aidDOCLIST`;" "TRUNCATE `aidREALAUTHORDATA`;" "TRUNCATE `aidREALAUTHORS`;" "TRUNCATE `aidVIRTUALAUTHORS`;" "TRUNCATE `aidVIRTUALAUTHORSCLUSTERS`;" "TRUNCATE `aidVIRTUALAUTHORSDATA`;") diff --git a/modules/bibauthorid/lib/bibauthorid_templates.py b/modules/bibauthorid/lib/bibauthorid_templates.py index b2800e005..e303bb80a 100644 --- a/modules/bibauthorid/lib/bibauthorid_templates.py +++ b/modules/bibauthorid/lib/bibauthorid_templates.py @@ -1,1591 +1,1591 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Bibauthorid HTML templates""" # pylint: disable=W0105 # pylint: disable=C0301 #from cgi import escape #from urllib import quote # import invenio.bibauthorid_config as bconfig from invenio.config import CFG_SITE_LANG from invenio.config import CFG_SITE_URL from invenio.config import CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL from invenio.bibformat import format_record from invenio.session import get_session -from invenio.search_engine import get_fieldvalues +from invenio.search_engine_utils import get_fieldvalues from invenio.bibauthorid_webapi import get_bibref_name_string, get_person_redirect_link, get_canonical_id_from_person_id from invenio.messages import gettext_set_language, wash_language #from invenio.textutils import encode_for_xml class Template: """Templating functions used by aid""" def __init__(self, language=CFG_SITE_LANG): """Set defaults for all aid template output""" self.language = language self._ = gettext_set_language(wash_language(language)) def tmpl_person_detail_layout(self, content): ''' writes HTML content into the person css container @param content: HTML content @type content: string @return: HTML code @rtype: string ''' html = [] h = html.append h('
') h(content) h('
') return "\n".join(html) def tmpl_notification_box(self, message, teaser="Notice:", show_close_btn=True): ''' Creates a notification box based on the jQuery UI style @param message: message to display in the box @type message: string @param teaser: Teaser text in bold next to icon @type teaser: string @param show_close_btn: display close button [x] @type show_close_btn: boolean @return: HTML code @rtype: string ''' html = [] h = html.append h('
') h('
') h('

') h(' %s %s' % (teaser, message)) if show_close_btn: h(' X

') h('
') h('
') return "\n".join(html) def tmpl_error_box(self, message, teaser="Alert:", show_close_btn=True): ''' Creates an error box based on the jQuery UI style @param message: message to display in the box @type message: string @param teaser: Teaser text in bold next to icon @type teaser: string @param show_close_btn: display close button [x] @type show_close_btn: boolean @return: HTML code @rtype: string ''' html = [] h = html.append h('
') h('
') h('

') h(' %s %s' % (teaser, message)) if show_close_btn: h(' X

') h('
') h('
') return "\n".join(html) def tmpl_ticket_box(self, teaser, message, ticket, show_close_btn=True): ''' Creates a semi-permanent box informing about ticket status notifications @param message: message to display in the box @type message: string @param teaser: Teaser text in bold next to icon @type teaser: string @param ticket: The ticket object from the session @param ticket: list of dict @param show_close_btn: display close button [x] @type show_close_btn: boolean @return: HTML code @rtype: string ''' html = [] h = html.append h('
') h('
') h('

') h(' %s %s ' % (teaser, message)) h('' + self._('Click here to review the transactions.') + '') h('
') if show_close_btn: h(' X

') h('
') h('
') return "\n".join(html) def tmpl_search_ticket_box(self, teaser, message, search_ticket, show_close_btn=False): ''' Creates a box informing about a claim in progress for the search. @param message: message to display in the box @type message: string @param teaser: Teaser text in bold next to icon @type teaser: string @param search_ticket: The search ticket object from the session @param search_ticket: list of dict @param show_close_btn: display close button [x] @type show_close_btn: boolean @return: HTML code @rtype: string ''' html = [] h = html.append h('
') h('
') h('

') h(' %s %s ' % (teaser, message)) h("

") h('' + self._('Quit searching.') + '') # h('DBGticket - ' + str(search_ticket)) if show_close_btn: h(' X

') h('
') h('
') h('

 

') return "\n".join(html) def tmpl_meta_includes(self, kill_browser_cache=False): ''' Generates HTML code for the header section of the document META tags to kill browser caching Javascript includes CSS definitions @param kill_browser_cache: Do we want to kill the browser cache? @type kill_browser_cache: boolean ''' js_path = "%s/js" % CFG_SITE_URL imgcss_path = "%s/img" % CFG_SITE_URL result = [] # Add browser cache killer, hence some notifications are not displayed # out of the session. if kill_browser_cache: result = [ '', '', '', '', ''] scripts = ["jquery-1.4.4.js", "ui.core.js", "jquery.ui.widget.min.js", "jquery.ui.tabs.min.js", "jquery.form.js", "jquery.dataTables.min.js", "jquery.ui.mouse.min.js", "jquery.ui.draggable.min.js", "jquery.ui.position.min.js", "jquery.ui.resizable.min.js", "jquery.ui.button.min.js", "jquery.ui.dialog.min.js", "bibauthorid.js"] result.append('' % (imgcss_path)) result.append('' % (imgcss_path)) result.append('' % (imgcss_path)) for script in scripts: result.append('' % (js_path, script)) return "\n".join(result) def tmpl_author_confirmed(self, bibref, pid, verbiage_dict={'alt_confirm':'Confirmed.', 'confirm_text':'This record assignment has been confirmed.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Repeal!', 'repeal_text':'Repeal record assignment', 'to_other_text':'Assign to another person', 'alt_to_other':'To other person!' }): ''' Generate play per-paper links for the table for the status "confirmed" @param bibref: construct of unique ID for this author on this paper @type bibref: string @param pid: the Person ID @type pid: int @param verbiage_dict: language for the link descriptions @type verbiage_dict: dict ''' return (' ' '%(alt_confirm)s' '%(confirm_text)s
' '' '%(alt_forget)s' '%(forget_text)s
' '' '%(alt_repeal)s' '%(repeal_text)s
' '' '%(alt_to_other)s' '%(to_other_text)s
' % ({'url': CFG_SITE_URL, 'ref': bibref, 'pid': pid, 'alt_confirm':verbiage_dict['alt_confirm'], 'confirm_text':verbiage_dict['confirm_text'], 'alt_forget':verbiage_dict['alt_forget'], 'forget_text':verbiage_dict['forget_text'], 'alt_repeal':verbiage_dict['alt_repeal'], 'repeal_text':verbiage_dict['repeal_text'], 'to_other_text':verbiage_dict['to_other_text'], 'alt_to_other':verbiage_dict['alt_to_other']})) def tmpl_author_repealed(self, bibref, pid, verbiage_dict={'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Rejected!', 'repeal_text':'Repeal this record assignment.', 'to_other_text':'Assign to another person', 'alt_to_other':'To other person!' }): ''' Generate play per-paper links for the table for the status "repealed" @param bibref: construct of unique ID for this author on this paper @type bibref: string @param pid: the Person ID @type pid: int @param verbiage_dict: language for the link descriptions @type verbiage_dict: dict ''' return (' ' '%(alt_repeal)s' '%(repeal_text)s
' '' '%(alt_forget)s' '%(forget_text)s
' '' '%(alt_confirm)s' '%(confirm_text)s
' '' '%(alt_to_other)s' '%(to_other_text)s
' % ({'url': CFG_SITE_URL, 'ref': bibref, 'pid': pid, 'alt_confirm':verbiage_dict['alt_confirm'], 'confirm_text':verbiage_dict['confirm_text'], 'alt_forget':verbiage_dict['alt_forget'], 'forget_text':verbiage_dict['forget_text'], 'alt_repeal':verbiage_dict['alt_repeal'], 'repeal_text':verbiage_dict['repeal_text'], 'to_other_text':verbiage_dict['to_other_text'], 'alt_to_other':verbiage_dict['alt_to_other']})) def tmpl_author_undecided(self, bibref, pid, verbiage_dict={'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_repeal':'Rejected!', 'repeal_text':'This record has been repealed.', 'to_other_text':'Assign to another person', 'alt_to_other':'To other person!' }): ''' Generate play per-paper links for the table for the status "no decision taken yet" @param bibref: construct of unique ID for this author on this paper @type bibref: string @param pid: the Person ID @type pid: int @param verbiage_dict: language for the link descriptions @type verbiage_dict: dict ''' #batchprocess?mconfirm=True&bibrefs=['100:17,16']&pid=1 return (' ' '' '%(alt_confirm)s' '%(confirm_text)s
' '' '%(alt_repeal)s' '%(repeal_text)s
' '' '%(alt_to_other)s' '%(to_other_text)s
' % ({'url': CFG_SITE_URL, 'ref': bibref, 'pid': pid, 'alt_confirm':verbiage_dict['alt_confirm'], 'confirm_text':verbiage_dict['confirm_text'], 'alt_repeal':verbiage_dict['alt_repeal'], 'repeal_text':verbiage_dict['repeal_text'], 'to_other_text':verbiage_dict['to_other_text'], 'alt_to_other':verbiage_dict['alt_to_other']})) def tmpl_open_claim(self, bibrefs, pid, last_viewed_pid, search_enabled=True): ''' Generate entry page for "claim or attribute this paper" @param bibref: construct of unique ID for this author on this paper @type bibref: string @param pid: the Person ID @type pid: int @param last_viewed_pid: last ID that had been subject to an action @type last_viewed_pid: int ''' t_html = [] h = t_html.append h(self._('You are about to attribute the following paper')) if len(bibrefs) > 1: h('s:
') else: h(':
') h("") pp_html = [] h = pp_html.append h(self.tmpl_notification_box("\n".join(t_html), self._("Info"), False)) h('

Your options:

') if pid > -1: h((' Claim for yourself
') % (CFG_SITE_URL, bibs, str(pid))) if last_viewed_pid: h((' Attribute to %s
') % (CFG_SITE_URL, bibs, str(last_viewed_pid[0]), last_viewed_pid[1])) if search_enabled: h(('' + self._(' Search for a person to attribute the paper to') + '
') % (CFG_SITE_URL, bibs)) return "\n".join(pp_html) def __tmpl_admin_records_table(self, form_id, person_id, bibrecids, verbiage_dict={'no_doc_string':'Sorry, there are currently no documents to be found in this category.', 'b_confirm':'Confirm', 'b_repeal':'Repeal', 'b_to_others':'Assign to other person', 'b_forget':'Forget decision'}, buttons_verbiage_dict={'mass_buttons':{'no_doc_string':'Sorry, there are currently no documents to be found in this category.', 'b_confirm':'Confirm', 'b_repeal':'Repeal', 'b_to_others':'Assign to other person', 'b_forget':'Forget decision'}, 'record_undecided':{'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_repeal':'Rejected!', 'repeal_text':'This record has been repealed.'}, 'record_confirmed':{'alt_confirm':'Confirmed.', 'confirm_text':'This record assignment has been confirmed.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Repeal!', 'repeal_text':'Repeal record assignment'}, 'record_repealed':{'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Rejected!', 'repeal_text':'Repeal this record assignment.'}}): ''' Generate the big tables for the person overview page @param form_id: name of the form @type form_id: string @param person_id: Person ID @type person_id: int @param bibrecids: List of records to display @type bibrecids: list @param verbiage_dict: language for the elements @type verbiage_dict: dict @param buttons_verbiage_dict: language for the buttons @type buttons_verbiage_dict: dict ''' no_papers_html = ['
'] no_papers_html.append('%s' % verbiage_dict['no_doc_string']) no_papers_html.append('
') if not bibrecids or not person_id: return "\n".join(no_papers_html) pp_html = [] h = pp_html.append h('
' % (form_id)) h('
') #+self._(' On all pages: ')) h('' + self._('Select All') + ' | ') h('' + self._('Select None') + ' | ') h('' + self._('Invert Selection') + ' | ') h('' + self._('Hide successful claims') + '') h('
') h('
') h(('∟') % (CFG_SITE_URL)) h('' % (person_id)) h('' % verbiage_dict['b_confirm']) h('' % verbiage_dict['b_repeal']) h('' % verbiage_dict['b_to_others']) h('' % verbiage_dict['b_forget']) h("
") h('') h("") h(" ") h(' ') h(' ') h(" ") h(" ") # h(" ") h(" ") h(" ") h("") h("") for idx, paper in enumerate(bibrecids): h(' ') h(' ' % (paper['bibref'])) rec_info = format_record(paper['recid'], "ha") rec_info = str(idx + 1) + '. ' + rec_info h(" " % (rec_info)) h(" " % (paper['authorname'].encode("utf-8"))) aff = "" if paper['authoraffiliation']: aff = paper['authoraffiliation'].encode("utf-8") else: aff = "unknown" h(" " % (aff)) # h(" " % (paper['paperdate'])) paper_status = self._("No status information found.") if paper['flag'] == 2: paper_status = self.tmpl_author_confirmed(paper['bibref'], person_id, verbiage_dict=buttons_verbiage_dict['record_confirmed']) elif paper['flag'] == -2: paper_status = self.tmpl_author_repealed(paper['bibref'], person_id, verbiage_dict=buttons_verbiage_dict['record_repealed']) else: paper_status = self.tmpl_author_undecided(paper['bibref'], person_id, verbiage_dict=buttons_verbiage_dict['record_undecided']) h(' ') h(" ") h(" ") h("
 Paper Short InfoAuthor NameAffiliationDateActions
%s%s%s%s
%s  
' % (paper['bibref'], paper['flag'], paper_status)) if 'rt_status' in paper and paper['rt_status']: h('' % (CFG_SITE_URL, self._("Operator review of user actions pending"))) h('
") h('
') #+self._(' On all pages: ')) h('' + self._('Select All') + ' | ') h('' + self._('Select None') + ' | ') h('' + self._('Invert Selection') + ' | ') h('' + self._('Hide successful claims') + '') h('
') h('
') h(('∟') % (CFG_SITE_URL)) h('' % (person_id)) h('' % verbiage_dict['b_confirm']) h('' % verbiage_dict['b_repeal']) h('' % verbiage_dict['b_to_others']) h('' % verbiage_dict['b_forget']) h("
") h("
") return "\n".join(pp_html) def __tmpl_reviews_table(self, person_id, bibrecids, admin=False): ''' Generate the table for potential reviews. @param form_id: name of the form @type form_id: string @param person_id: Person ID @type person_id: int @param bibrecids: List of records to display @type bibrecids: list @param admin: Show admin functions @type admin: boolean ''' no_papers_html = ['
'] no_papers_html.append(self._('Sorry, there are currently no records to be found in this category.')) no_papers_html.append('
') if not bibrecids or not person_id: return "\n".join(no_papers_html) pp_html = [] h = pp_html.append h('
') h('') h(" ") h(" ") h(' ') h(' ') h(" ") h(" ") h(" ") h(" ") for paper in bibrecids: h(' ') h(' ' % (paper)) rec_info = format_record(paper[0], "ha") if not admin: rec_info = rec_info.replace("person/search?q=", "author/") h(" " % (rec_info)) h(' ' % (paper)) h(" ") h(" ") h("
 Paper Short InfoActions
%s' + self._('Review Transaction') + '
") h('
' + self._(' On all pages: ')) h('' + self._('Select All') + ' | ') h('' + self._('Select None') + ' | ') h('' + self._('Invert Selection') + '') h('
') h('
') h('∟ With selected do: ') h('' % (person_id)) h('') h('') h("
") h('
') return "\n".join(pp_html) def tmpl_admin_person_info_box(self, ln, person_id= -1, names=[]): ''' Generate the box showing names @param ln: the language to use @type ln: string @param person_id: Person ID @type person_id: int @param names: List of names to display @type names: list ''' html = [] h = html.append if not ln: pass #class="ui-tabs ui-widget ui-widget-content ui-corner-all"> h('
' + self._('Names variants:') + '

') h("

") h('' % (person_id, person_id)) for name in names: # h(("%s "+self._('as appeared on')+" %s"+self._(' records')+"
") # % (name[0], name[1])) h(("%s (%s); ") % (name[0], name[1])) h("

") h("
") return "\n".join(html) def tmpl_admin_tabs(self, ln=CFG_SITE_LANG, person_id= -1, rejected_papers=[], rest_of_papers=[], review_needed=[], rt_tickets=[], open_rt_tickets=[], show_tabs=['records', 'repealed', 'review', 'comments', 'tickets', 'data'], ticket_links=['delete', 'commit', 'del_entry', 'commit_entry'], verbiage_dict={'confirmed':'Records', 'repealed':'Not this person\'s records', 'review':'Records in need of review', 'tickets':'Open Tickets', 'data':'Data', 'confirmed_ns':'Papers of this Person', 'repealed_ns':'Papers _not_ of this Person', 'review_ns':'Papers in need of review', 'tickets_ns':'Tickets for this Person', 'data_ns':'Additional Data for this Person'}, buttons_verbiage_dict={'mass_buttons':{'no_doc_string':'Sorry, there are currently no documents to be found in this category.', 'b_confirm':'Confirm', 'b_repeal':'Repeal', 'b_to_others':'Assign to other person', 'b_forget':'Forget decision'}, 'record_undecided':{'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_repeal':'Rejected!', 'repeal_text':'This record has been repealed.'}, 'record_confirmed':{'alt_confirm':'Confirmed.', 'confirm_text':'This record assignment has been confirmed.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Repeal!', 'repeal_text':'Repeal record assignment'}, 'record_repealed':{'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Rejected!', 'repeal_text':'Repeal this record assignment.'}}): ''' Generate the tabs for the person overview page @param ln: the language to use @type ln: string @param person_id: Person ID @type person_id: int @param rejected_papers: list of repealed papers @type rejected_papers: list @param rest_of_papers: list of attributed of undecided papers @type rest_of_papers: list @param review_needed: list of papers that need a review (choose name) @type review_needed:list @param rt_tickets: list of tickes for this Person @type rt_tickets: list @param open_rt_tickets: list of open request tickets @type open_rt_tickets: list @param show_tabs: list of tabs to display @type show_tabs: list of strings @param ticket_links: list of links to display @type ticket_links: list of strings @param verbiage_dict: language for the elements @type verbiage_dict: dict @param buttons_verbiage_dict: language for the buttons @type buttons_verbiage_dict: dict ''' html = [] h = html.append h('
') h(' ') if 'records' in show_tabs: h('
') r = verbiage_dict['confirmed_ns'] h('' % r) h(self.__tmpl_admin_records_table("massfunctions", person_id, rest_of_papers, verbiage_dict=buttons_verbiage_dict['mass_buttons'], buttons_verbiage_dict=buttons_verbiage_dict)) h("
") if 'repealed' in show_tabs: h('
') r = verbiage_dict['repealed_ns'] h('' % r) h(self._('These records have been marked as not being from this person.')) h('
' + self._('They will be regarded in the next run of the author ') + self._('disambiguation algorithm and might disappear from this listing.')) h(self.__tmpl_admin_records_table("rmassfunctions", person_id, rejected_papers, verbiage_dict=buttons_verbiage_dict['mass_buttons'], buttons_verbiage_dict=buttons_verbiage_dict)) h("
") if 'review' in show_tabs: h('
') r = verbiage_dict['review_ns'] h('' % r) h(self.__tmpl_reviews_table(person_id, review_needed, True)) h('
') if 'tickets' in show_tabs: h('
') r = verbiage_dict['tickets'] h('' % r) r = verbiage_dict['tickets_ns'] h('

%s:

' % r) if rt_tickets: pass # open_rt_tickets = [a for a in open_rt_tickets if a[1] == rt_tickets] for t in open_rt_tickets: name = self._('Not provided') surname = self._('Not provided') uidip = self._('Not available') comments = self._('No comments') email = self._('Not provided') date = self._('Not Available') actions = [] for info in t[0]: if info[0] == 'firstname': name = info[1] elif info[0] == 'lastname': surname = info[1] elif info[0] == 'uid-ip': uidip = info[1] elif info[0] == 'comments': comments = info[1] elif info[0] == 'email': email = info[1] elif info[0] == 'date': date = info[1] elif info[0] in ['confirm', 'repeal']: actions.append(info) if 'delete' in ticket_links: h(('Ticket number: %(tnum)s ' + self._(' Delete this ticket') + ' ') % ({'tnum':t[1], 'url':CFG_SITE_URL, 'pid':str(person_id)})) if 'commit' in ticket_links: h((' or ' + self._(' Commit this entire ticket') + '
') % ({'tnum':t[1], 'url':CFG_SITE_URL, 'pid':str(person_id)})) h('
') h('Open from: %s, %s
' % (surname, name)) h('Date: %s
' % date) h('identified by: %s
' % uidip) h('email: %s
' % email) h('comments: %s
' % comments) h('Suggested actions:
') h('
') for a in actions: bibref, bibrec = a[1].split(',') pname = get_bibref_name_string(bibref) title = "" try: title = get_fieldvalues(int(bibrec), "245__a")[0] except IndexError: title = "No title available" if 'commit_entry' in ticket_links: h('%(action)s - %(name)s on %(title)s ' % ({'action': a[0], 'url': CFG_SITE_URL, 'pid': str(person_id), 'bib':a[1], 'name': pname, 'title': title, 'rt': t[1]})) else: h('%(action)s - %(name)s on %(title)s' % ({'action': a[0], 'name': pname, 'title': title})) if 'del_entry' in ticket_links: h(' - Delete this entry ' % ({'action': a[0], 'url': CFG_SITE_URL, 'pid': str(person_id), 'bib': a[1], 'rt': t[1]})) h(' - View record
' % ({'url':CFG_SITE_URL, 'record':str(bibrec)})) h('
') h('
') # h(str(open_rt_tickets)) h("
") if 'data' in show_tabs: h('
') r = verbiage_dict['data_ns'] h('' % r) canonical_name = get_canonical_id_from_person_id(person_id) h('
Canonical name setup ') h('
Current canonical name: %s
' % (canonical_name, CFG_SITE_URL)) h('') h(' ' % canonical_name) h('' % person_id) h('') h('
') h(' ' + self._('... This tab is currently under construction ... ') + '

') h("
") h("
") return "\n".join(html) def tmpl_bibref_check(self, bibrefs_auto_assigned, bibrefs_to_confirm): ''' Generate overview to let user chose the name on the paper that resembles the person in question. @param bibrefs_auto_assigned: list of auto-assigned papers @type bibrefs_auto_assigned: list @param bibrefs_to_confirm: list of unclear papers and names @type bibrefs_to_confirm: list ''' html = [] h = html.append h('
') h('

' + self._('We could not reliably determine the name of the author on the records below to automatically perform an assignment.') + '

') h('

' + self._('Please select an author for the records in question.') + '
') h(self._('Boxes not selected will be ignored in the process.')) h('

') for person in bibrefs_to_confirm: if not "bibrecs" in bibrefs_to_confirm[person]: continue h((self._("Select name for") + " %s") % bibrefs_to_confirm[person]["person_name"]) pid = person for recid in bibrefs_to_confirm[person]["bibrecs"]: h('
') try: fv = get_fieldvalues(int(recid), "245__a")[0] except (ValueError, IndexError, TypeError): fv = self._('Error retrieving record title') h(self._("Paper title: ") + fv) h('') h("
") if bibrefs_auto_assigned: h(self._('The following names have been automatically chosen:')) for person in bibrefs_auto_assigned: if not "bibrecs" in bibrefs_auto_assigned[person]: continue h((self._("For") + " %s:") % bibrefs_auto_assigned[person]["person_name"]) pid = person for recid in bibrefs_auto_assigned[person]["bibrecs"]: try: fv = get_fieldvalues(int(recid), "245__a")[0] except (ValueError, IndexError, TypeError): fv = self._('Error retrieving record title') h('
') h(('%s' + self._('with name: ')) % (fv)) #, bibrefs_auto_assigned[person]["bibrecs"][recid][0][1])) # asbibref = "%s||%s" % (person, bibrefs_auto_assigned[person]["bibrecs"][recid][0][0]) pbibref = bibrefs_auto_assigned[person]["bibrecs"][recid][0][0] h('') # h('' # % (recid, asbibref)) h('
') h('
') h(' ') h(' ') h("
") h('
') return "\n".join(html) def tmpl_invenio_search_box(self): ''' Generate little search box for missing papers. Links to main invenio search on start papge. ''' html = [] h = html.append h('
Search for missing papers:
' % CFG_SITE_URL) h(' ') h('') h('
') return "\n".join(html) def tmpl_person_menu(self): ''' Generate the menu bar ''' html = [] h = html.append h('
') return "\n".join(html) def tmpl_person_menu_admin(self): ''' Generate the menu bar ''' html = [] h = html.append h('
') h(' ') h('
') return "\n".join(html) def tmpl_ticket_final_review(self, req, mark_yours=[], mark_not_yours=[], mark_theirs=[], mark_not_theirs=[]): ''' Generate final review page. Displaying transactions if they need confirmation. @param req: Apache request object @type req: Apache request object @param mark_yours: papers marked as 'yours' @type mark_yours: list @param mark_not_yours: papers marked as 'not yours' @type mark_not_yours: list @param mark_theirs: papers marked as being someone else's @type mark_theirs: list @param mark_not_theirs: papers marked as NOT being someone else's @type mark_not_theirs: list ''' def html_icon_legend(): html = [] h = html.append h('
') h("

") h(self._("Symbols legend: ")) h("

") h('') h('' % (CFG_SITE_URL, self._("Everything is shiny, captain!"))) h(self._('The result of this request will be visible immediately')) h('
') h('') h('' % (CFG_SITE_URL, self._("Confirmation needed to continue"))) h(self._('The result of this request will be visible immediately but we need your confirmation to do so for this paper have been manually claimed before')) h('
') h('') h('' % (CFG_SITE_URL, self._("This will create a change request for the operators"))) h(self._("The result of this request will be visible upon confirmation through an operator")) h("") h("
") return "\n".join(html) def mk_ticket_row(ticket): recid = -1 rectitle = "" recauthor = "No Name Found." personname = "No Name Found." try: recid = ticket['bibref'].split(",")[1] except (ValueError, KeyError, IndexError): return "" try: rectitle = get_fieldvalues(int(recid), "245__a")[0] except (ValueError, IndexError, TypeError): rectitle = self._('Error retrieving record title') if "authorname_rec" in ticket: recauthor = ticket['authorname_rec'] if "person_name" in ticket: personname = ticket['person_name'] html = [] h = html.append # h("Debug: " + str(ticket) + "
") h(' ') h('') h(rectitle) h('') h('') h((personname + " (" + self._("Selected name on paper") + ": %s)") % recauthor) h('') h('') if ticket['status'] == "granted": h('' % (CFG_SITE_URL, self._("Everything is shiny, captain!"))) elif ticket['status'] == "warning_granted": h('' % (CFG_SITE_URL, self._("Verification needed to continue"))) else: h('' % (CFG_SITE_URL, self._("This will create a request for the operators"))) h('') h('') h('' 'Cancel' '' % (CFG_SITE_URL, ticket['bibref'])) h('') return "\n".join(html) session = get_session(req) pinfo = session["personinfo"] ulevel = pinfo["ulevel"] html = [] h = html.append # h(html_icon_legend()) if "checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"]: h(self.tmpl_error_box(self._("Please Check your entries"), self._("Sorry."))) if ("checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"] and "tickets" in pinfo["checkout_faulty_fields"]): h(self.tmpl_error_box(self._("Please provide at least one transaction."), self._("Error:"))) # h('
' + # self._('Almost done! Please use the button "Confirm these changes" ' # 'at the end of the page to send this request to an operator ' # 'for review!') + '
') h('
') h("

" + self._('Please provide your information') + "

") h('
' % (CFG_SITE_URL)) if ("checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"] and "user_first_name" in pinfo["checkout_faulty_fields"]): h("

" + self._('Please provide your first name') + "

") h("

") if "user_first_name_sys" in pinfo and pinfo["user_first_name_sys"]: h((self._("Your first name:") + " %s") % pinfo["user_first_name"]) else: h(self._('Your first name:') + ' ' % pinfo["user_first_name"]) if ("checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"] and "user_last_name" in pinfo["checkout_faulty_fields"]): h("

" + self._('Please provide your last name') + "

") h("

") if "user_last_name_sys" in pinfo and pinfo["user_last_name_sys"]: h((self._("Your last name:") + " %s") % pinfo["user_last_name"]) else: h(self._('Your last name:') + ' ' % pinfo["user_last_name"]) h("

") if ("checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"] and "user_email" in pinfo["checkout_faulty_fields"]): h("

" + self._('Please provide your eMail address') + "

") if ("checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"] and "user_email_taken" in pinfo["checkout_faulty_fields"]): h("

" + self._('This eMail address is reserved by a user. Please log in or provide an alternative eMail address') + "

") h("

") if "user_email_sys" in pinfo and pinfo["user_email_sys"]: h((self._("Your eMail:") + " %s") % pinfo["user_email"]) else: h((self._('Your eMail:') + ' ') % pinfo["user_email"]) h("

") h(self._("You may leave a comment (optional)") + ":
") h('") h("

") h("

 

") h('
') h((' ') % self._("Continue claiming*")) h((' ') % self._("Confirm these changes**")) h('') h((' ') % self._("!Delete the entire request!")) h('') h('
') h("
") h('
') h('
') h('') if not ulevel == "guest": h('') h("") h('') if mark_yours: for idx, ticket in enumerate(mark_yours): h('' % ((idx + 1) % 2)) h(mk_ticket_row(ticket)) h('') else: h('') h('') h('') h("") h('') h("") h('') if mark_not_yours: for idx, ticket in enumerate(mark_not_yours): h('' % ((idx + 1) % 2)) h(mk_ticket_row(ticket)) h('') else: h('') h('') h('') h("") h('') h("") h('') if mark_theirs: for idx, ticket in enumerate(mark_theirs): h('' % ((idx + 1) % 2)) h(mk_ticket_row(ticket)) h('') else: h('') h('') h('') h("") h('') h("") h('') if mark_not_theirs: for idx, ticket in enumerate(mark_not_theirs): h('' % ((idx + 1) % 2)) h(mk_ticket_row(ticket)) h('') else: h('') h('') h('') h("") h('

" + self._('Mark as your documents') + "

 Nothing staged as yours

" + self._("Mark as _not_ your documents") + "

 ' + self._('Nothing staged as not yours') + '

" + self._('Mark as their documents') + "

 ' + self._('Nothing staged in this category') + '

" + self._('Mark as _not_ their documents') + "

 ' + self._('Nothing staged in this category') + '
') h("
") h("

") h(self._(" * You can come back to this page later. Nothing will be lost.
")) h(self._(" ** Performs all requested changes. Changes subject to permission restrictions " "will be submitted to an operator for manual review.")) h("

") h(html_icon_legend()) return "\n".join(html) def tmpl_author_search(self, query, results, search_ticket=None, author_pages_mode=True, fallback_mode=False, fallback_title='', fallback_message='', new_person_link=False): ''' Generates the search for Person entities. @param query: the query a user issued to the search @type query: string @param results: list of results @type results: list @param search_ticket: search ticket object to inform about pending claiming procedure @type search_ticket: dict ''' linktarget = "person" if author_pages_mode: linktarget = "author" if not query: query = "" html = [] h = html.append h('
') h('' % query) h('') h('
') if fallback_mode: if fallback_title: h('' % fallback_title) if fallback_message: h('%s' % fallback_message) if not results and not query: h('') return "\n".join(html) h("

 

") if query and not results: authemail = CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL h(('' + self._("We do not have a publication list for '%s'." + " Try using a less specific author name, or check" + " back in a few days as attributions are updated " + "frequently. Or you can send us feedback, at ") + "%s.") % (query, authemail, authemail)) h('') if new_person_link: if search_ticket: link = "%s/person/action?confirm=True&pid=%s" % (CFG_SITE_URL, '-3') for r in search_ticket['bibrefs']: link = link + '&selection=%s' % str(r) else: link = "%s/person/action?confirm=True&pid=%s" % (CFG_SITE_URL, '-3') h('
') h('' % (link)) h(self._("Create a new Person for your search")) h('') h('
') return "\n".join(html) # base_color = 100 # row_color = 0 for index, result in enumerate(results): # if len(results) > base_color: # row_color += 1 # else: # row_color = base_color - (base_color - index * # (base_color / len(results))) pid = result[0] names = result[1] papers = result[2] h('
' % (index % 2)) h('
') # h('%s. ' # % (row_color, row_color, row_color, index + 1)) h('%s. ' % (index + 1)) # for nindex, name in enumerate(names): # color = row_color + nindex * 35 # color = min(color, base_color) # h('%s; ' # % (color, color, color, name[0])) for name in names: h('%s ' % (name[0])) h('
') h('') if index < bconfig.PERSON_SEARCH_RESULTS_SHOW_PAPERS_PERSON_LIMIT: h(('' ' ' + self._('Recent Papers') + '') % (pid)) if search_ticket: link = "%s/person/action?confirm=True&pid=%s" % (CFG_SITE_URL, pid) for r in search_ticket['bibrefs']: link = link + '&selection=%s' % str(r) h(('' '' '' + self._('YES!') + '' + self._(' Attribute Papers To ') + '%s (PersonID: %d )') % (link, get_person_redirect_link(pid), pid)) else: h(('' '' + self._('Publication List ') + '(%s)') % (CFG_SITE_URL, linktarget, get_person_redirect_link(pid), get_person_redirect_link(pid))) h('
' % (pid)) if papers and index < bconfig.PERSON_SEARCH_RESULTS_SHOW_PAPERS_PERSON_LIMIT: h((self._('Showing the') + ' %d ' + self._('most recent documents:')) % len(papers)) h("
    ") for paper in papers: h("
  • %s
  • " % (format_record(paper[0], "ha"))) h("
") elif not papers: h("

" + self._('Sorry, there are no documents known for this person') + "

") elif index >= bconfig.PERSON_SEARCH_RESULTS_SHOW_PAPERS_PERSON_LIMIT: h("

" + self._('Information not shown to increase performances. Please refine your search.') + "

") h(('' '' + self._('Publication List ') + '(%s) (in a new window or tab)') % (CFG_SITE_URL, linktarget, get_person_redirect_link(pid), get_person_redirect_link(pid))) h('
') h('
') if new_person_link: if search_ticket: link = "%s/person/action?confirm=True&pid=%s" % (CFG_SITE_URL, '-3') for r in search_ticket['bibrefs']: link = link + '&selection=%s' % str(r) else: link = "%s/person/action?confirm=True&pid=%s" % (CFG_SITE_URL, '-3') h('
') h('' % (link)) h(self._("Create a new Person for your search")) h('') h('
') return "\n".join(html) def tmpl_welcome_start(self): ''' Shadows the behaviour of tmpl_search_pagestart ''' return '
' def tmpl_welcome_arxiv(self): ''' SSO landing/welcome page. ''' html = [] h = html.append h('

Congratulations! you have now successfully registered in INSPIRE via arXiv!

') h('

In the coming months, your INSPIRE account will give you the ability ' 'to use personalized features of INSPIRE and other powerful tools.

') h('

Right now, you can use your INSPIRE account to correct your ' 'publication record and help us to produce better publication lists ' 'and citation statistics.

') h('

We are importing your publication list from arXiv right now, and ' 'use this information to find other papers you\'ve written.' ' This may take a few seconds, or even a few minutes if you\'ve' ' been very busy. You might like to grab a cup of coffee and come' ' back, or you can always login later, and your account will be ' 'prepopulated. When we\'re done, you\'ll see a link to correct your ' 'publications below.

') h('

When the link appears we invite you to confirm the papers that are ' 'yours and to reject the ones that you are not author of. If you have ' 'any questions or encounter any problems please contact us here: ' '%s

' % (CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL, CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL)) return "\n".join(html) def tmpl_welcome(self): ''' SSO landing/welcome page. ''' html = [] h = html.append h('

Congratulations! you have successfully logged in!

') h('

We are currently creating your publication list. When we\'re done, you\'ll see a link to correct your ' 'publications below.

') h('

When the link appears we invite you to confirm the papers that are ' 'yours and to reject the ones that you are not author of. If you have ' 'any questions or encounter any problems please contact us here: ' '%s

' % (CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL, CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL)) return "\n".join(html) def tmpl_welcome_link(self): ''' Creates the link for the actual user action. ''' return '' + \ self._('Correct my publication lists!') + \ '' def tmpl_welcome_end(self): ''' Shadows the behaviour of tmpl_search_pageend ''' return '
' def tmpl_tickets_admin(self, tickets=[]): ''' Open tickets short overview for operators. ''' html = [] h = html.append if len(tickets) > 0: h('List of open tickets:

') for t in tickets: h(' %(longname)s - (%(cname)s - PersonID: %(pid)s): %(num)s open tickets.
' % ({'cname':str(t[1]), 'longname':str(t[0]), 'pid':str(t[2]), 'num':str(t[3])})) else: h('There are currently no open tickets.') return "\n".join(html) # pylint: enable=C0301 diff --git a/modules/bibauthorid/lib/bibauthorid_webinterface.py b/modules/bibauthorid/lib/bibauthorid_webinterface.py index 6bede7472..5e755a091 100644 --- a/modules/bibauthorid/lib/bibauthorid_webinterface.py +++ b/modules/bibauthorid/lib/bibauthorid_webinterface.py @@ -1,2477 +1,2478 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Bibauthorid Web Interface Logic and URL handler.""" # pylint: disable=W0105 # pylint: disable=C0301 # pylint: disable=W0613 from cgi import escape from copy import deepcopy import sys from invenio.bibauthorid_config import CLAIMPAPER_ADMIN_ROLE from invenio.bibauthorid_config import CLAIMPAPER_USER_ROLE #from invenio.bibauthorid_config import EXTERNAL_CLAIMED_RECORDS_KEY from invenio.config import CFG_SITE_LANG from invenio.config import CFG_SITE_URL from invenio.config import CFG_SITE_NAME from invenio.config import CFG_INSPIRE_SITE #from invenio.config import CFG_SITE_SECURE_URL from invenio.webpage import page, pageheaderonly, pagefooteronly from invenio.messages import gettext_set_language, wash_language from invenio.template import load from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.session import get_session from invenio.urlutils import redirect_to_url from invenio.webuser import getUid, page_not_authorized, collect_user_info, set_user_preferences from invenio.webuser import email_valid_p, emailUnique from invenio.webuser import get_email_from_username, get_uid_from_email, isUserSuperAdmin from invenio.access_control_admin import acc_find_user_role_actions from invenio.access_control_admin import acc_get_user_roles, acc_get_role_id -from invenio.search_engine import perform_request_search, get_fieldvalues, sort_records +from invenio.search_engine import perform_request_search, sort_records +from invenio.search_engine_utils import get_fieldvalues import invenio.bibauthorid_webapi as webapi import invenio.bibauthorid_config as bconfig from pprint import pformat JSON_OK = False if sys.hexversion < 0x2060000: try: import simplejson as json JSON_OK = True except ImportError: # Okay, no Ajax app will be possible, but continue anyway, # since this package is only recommended, not mandatory. JSON_OK = False else: try: import json JSON_OK = True except ImportError: JSON_OK = False TEMPLATE = load('bibauthorid') class WebInterfaceBibAuthorIDPages(WebInterfaceDirectory): """ Handle /person pages and AJAX requests Supplies the methods /person/ /person/action /person/welcome /person/search /person/you -> /person/ /person/export """ _exports = ['', 'action', 'welcome', 'search', 'you', 'export', 'tickets_admin'] def __init__(self, person_id=None): """ Constructor of the web interface. @param person_id: The identifier of a user. Can be one of: - a bibref: e.g. "100:1442,155" - a person id: e.g. "14" - a canonical id: e.g. "Ellis_J_1" @type person_id: string @return: will return an empty object if the identifier is of wrong type @rtype: None (if something is not right) """ pid = -1 is_bibref = False is_canonical_id = False self.adf = self.__init_call_dispatcher() if (not isinstance(person_id, str)) or (not person_id): self.person_id = pid return None if person_id.count(":") and person_id.count(","): is_bibref = True elif webapi.is_valid_canonical_id(person_id): is_canonical_id = True if is_bibref and pid > -2: bibref = person_id table, ref, bibrec = None, None, None if not bibref.count(":"): pid = -2 if not bibref.count(","): pid = -2 try: table = bibref.split(":")[0] ref = bibref.split(":")[1].split(",")[0] bibrec = bibref.split(":")[1].split(",")[1] except IndexError: pid = -2 try: table = int(table) ref = int(ref) bibrec = int(bibrec) except (ValueError, TypeError): pid = -2 if pid == -1: try: pid = int(webapi.get_person_id_from_paper(person_id)) except (ValueError, TypeError): pid = -1 else: pid = -1 elif is_canonical_id: try: pid = int(webapi.get_person_id_from_canonical_id(person_id)) except (ValueError, TypeError): pid = -1 else: try: pid = int(person_id) except ValueError: pid = -1 self.person_id = pid def __call__(self, req, form): ''' Serve the main person page. Will use the object's person id to get a person's information. @param req: Apache Request Object @type req: Apache Request Object @param form: Parameters sent via GET or POST request @type form: dict @return: a full page formatted in HTML @return: string ''' self._session_bareinit(req) argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0), 'ticketid': (int, -1), 'open_claim': (str, None)}) ln = wash_language(argd['ln']) rt_ticket_id = argd['ticketid'] req.argd = argd #needed for perform_req_search session = get_session(req) ulevel = self.__get_user_role(req) uid = getUid(req) if isUserSuperAdmin({'uid': uid}): ulevel = 'admin' no_access = self._page_access_permission_wall(req, [self.person_id]) if no_access: return no_access try: pinfo = session["personinfo"] except KeyError: pinfo = dict() session['personinfo'] = pinfo if 'open_claim' in argd and argd['open_claim']: pinfo['claim_in_process'] = True elif "claim_in_process" in pinfo and pinfo["claim_in_process"]: pinfo['claim_in_process'] = True else: pinfo['claim_in_process'] = False uinfo = collect_user_info(req) uinfo['precached_viewclaimlink'] = pinfo['claim_in_process'] set_user_preferences(uid, uinfo) pinfo['ulevel'] = ulevel if self.person_id != -1: pinfo["claimpaper_admin_last_viewed_pid"] = self.person_id pinfo["ln"] = ln if not "ticket" in pinfo: pinfo["ticket"] = [] if rt_ticket_id: pinfo["admin_requested_ticket_id"] = rt_ticket_id session.save() content = '' for part in ['optional_menu', 'ticket_box', 'personid_info', 'tabs', 'footer']: content += self.adf[part][ulevel](req, form, ln) title = self.adf['title'][ulevel](req, form, ln) body = TEMPLATE.tmpl_person_detail_layout(content) metaheaderadd = self._scripts() self._clean_ticket(req) return page(title=title, metaheaderadd=metaheaderadd, body=body, req=req, language=ln) def _page_access_permission_wall(self, req, req_pid=None, req_level=None): ''' Display an error page if user not authorized to use the interface. @param req: Apache Request Object for session management @type req: Apache Request Object @param req_pid: Requested person id @type req_pid: int @param req_level: Request level required for the page @type req_level: string ''' session = get_session(req) uid = getUid(req) pinfo = session["personinfo"] uinfo = collect_user_info(req) if 'ln' in pinfo: ln = pinfo["ln"] else: ln = CFG_SITE_LANG _ = gettext_set_language(ln) is_authorized = True pids_to_check = [] if not bconfig.AID_ENABLED: return page_not_authorized(req, text=_("Fatal: Author ID capabilities are disabled on this system.")) if req_level and 'ulevel' in pinfo and pinfo["ulevel"] != req_level: return page_not_authorized(req, text=_("Fatal: You are not allowed to access this functionality.")) if req_pid and not isinstance(req_pid, list): pids_to_check = [req_pid] elif req_pid and isinstance(req_pid, list): pids_to_check = req_pid if (not (uinfo['precached_usepaperclaim'] or uinfo['precached_usepaperattribution']) and 'ulevel' in pinfo and not pinfo["ulevel"] == "admin"): is_authorized = False if is_authorized and not webapi.user_can_view_CMP(uid): is_authorized = False if is_authorized and 'ticket' in pinfo: for tic in pinfo["ticket"]: if 'pid' in tic: pids_to_check.append(tic['pid']) if pids_to_check and is_authorized: user_pid = webapi.get_pid_from_uid(uid) if not uinfo['precached_usepaperattribution']: if user_pid[1]: user_pid = user_pid[0][0] else: user_pid = -1 if (not user_pid in pids_to_check and 'ulevel' in pinfo and not pinfo["ulevel"] == "admin"): is_authorized = False elif (user_pid in pids_to_check and 'ulevel' in pinfo and not pinfo["ulevel"] == "admin"): for tic in list(pinfo["ticket"]): if not tic["pid"] == user_pid: pinfo['ticket'].remove(tic) if not is_authorized: return page_not_authorized(req, text=_("Fatal: You are not allowed to access this functionality.")) else: return "" def _session_bareinit(self, req): ''' Initializes session personinfo entry if none exists @param req: Apache Request Object @type req: Apache Request Object ''' session = get_session(req) uid = getUid(req) ulevel = self.__get_user_role(req) if isUserSuperAdmin({'uid': uid}): ulevel = 'admin' try: pinfo = session["personinfo"] pinfo['ulevel'] = ulevel if "claimpaper_admin_last_viewed_pid" not in pinfo: pinfo["claimpaper_admin_last_viewed_pid"] = -2 if 'ln' not in pinfo: pinfo["ln"] = 'en' if 'ticket' not in pinfo: pinfo["ticket"] = [] session.save() except KeyError: pinfo = dict() session['personinfo'] = pinfo pinfo['ulevel'] = ulevel pinfo["claimpaper_admin_last_viewed_pid"] = -2 pinfo["ln"] = 'en' pinfo["ticket"] = [] session.save() def _lookup(self, component, path): """ This handler parses dynamic URLs: - /person/1332 shows the page of person 1332 - /person/100:5522,1431 shows the page of the person identified by the table:bibref,bibrec pair """ if not component in self._exports: return WebInterfaceBibAuthorIDPages(component), path def __init_call_dispatcher(self): ''' Initialization of call dispacher dictionary @return: call dispatcher dictionary @rtype: dict ''' #author_detail_functions adf = dict() adf['title'] = dict() adf['optional_menu'] = dict() adf['ticket_box'] = dict() adf['tabs'] = dict() adf['footer'] = dict() adf['personid_info'] = dict() adf['ticket_dispatch'] = dict() adf['ticket_commit'] = dict() adf['title']['guest'] = self._generate_title_guest adf['title']['user'] = self._generate_title_user adf['title']['admin'] = self._generate_title_admin adf['optional_menu']['guest'] = self._generate_optional_menu_guest adf['optional_menu']['user'] = self._generate_optional_menu_user adf['optional_menu']['admin'] = self._generate_optional_menu_admin adf['ticket_box']['guest'] = self._generate_ticket_box_guest adf['ticket_box']['user'] = self._generate_ticket_box_user adf['ticket_box']['admin'] = self._generate_ticket_box_admin adf['personid_info']['guest'] = self._generate_person_info_box_guest adf['personid_info']['user'] = self._generate_person_info_box_user adf['personid_info']['admin'] = self._generate_person_info_box_admin adf['tabs']['guest'] = self._generate_tabs_guest adf['tabs']['user'] = self._generate_tabs_user adf['tabs']['admin'] = self._generate_tabs_admin adf['footer']['guest'] = self._generate_footer_guest adf['footer']['user'] = self._generate_footer_user adf['footer']['admin'] = self._generate_footer_admin adf['ticket_dispatch']['guest'] = self._ticket_dispatch_user adf['ticket_dispatch']['user'] = self._ticket_dispatch_user adf['ticket_dispatch']['admin'] = self._ticket_dispatch_admin adf['ticket_commit']['guest'] = self._ticket_commit_guest adf['ticket_commit']['user'] = self._ticket_commit_user adf['ticket_commit']['admin'] = self._ticket_commit_admin return adf def _generate_title_guest(self, req, form, ln): ''' Generate the title for a guest user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' if self.person_id: return 'Attribute papers for: ' + str(webapi.get_person_redirect_link(self.person_id)) else: return 'Attribute papers' def _generate_title_user(self, req, form, ln): ''' Generate the title for a regular user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' if self.person_id: return 'Attribute papers (user interface) for: ' + str(webapi.get_person_redirect_link(self.person_id)) else: return 'Attribute papers' def _generate_title_admin(self, req, form, ln): ''' Generate the title for an admin user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' if self.person_id: return 'Attribute papers (administrator interface) for: ' + str(webapi.get_person_redirect_link(self.person_id)) else: return 'Attribute papers' def _generate_optional_menu_guest(self, req, form, ln): ''' Generate the menu for a guest user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0)}) menu = TEMPLATE.tmpl_person_menu() if "verbose" in argd and argd["verbose"] > 0: session = get_session(req) pinfo = session['personinfo'] menu += "\n
" + pformat(pinfo) + "
\n" return menu def _generate_optional_menu_user(self, req, form, ln): ''' Generate the menu for a regular user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0)}) menu = TEMPLATE.tmpl_person_menu() if "verbose" in argd and argd["verbose"] > 0: session = get_session(req) pinfo = session['personinfo'] menu += "\n
" + pformat(pinfo) + "
\n" return menu def _generate_optional_menu_admin(self, req, form, ln): ''' Generate the title for an admin user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0)}) menu = TEMPLATE.tmpl_person_menu_admin() if "verbose" in argd and argd["verbose"] > 0: session = get_session(req) pinfo = session['personinfo'] menu += "\n
" + pformat(pinfo) + "
\n" return menu def _generate_ticket_box_guest(self, req, form, ln): ''' Generate the semi-permanent info box for a guest user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' session = get_session(req) pinfo = session['personinfo'] ticket = pinfo['ticket'] pendingt = [] donet = [] for t in ticket: if 'execution_result' in t: if t['execution_result'] == True: donet.append(t) else: pendingt.append(t) if len(pendingt) == 1: message = 'There is ' + str(len(pendingt)) + ' transaction in progress.' else: message = 'There are ' + str(len(pendingt)) + ' transactions in progress.' teaser = 'Claim in process!' if len(pendingt) == 0: box = "" else: box = TEMPLATE.tmpl_ticket_box(teaser, message, "") if len(donet) > 0: teaser = 'Success!' if len(donet) == 1: message = str(len(donet)) + ' transaction succesfully executed.' else: message = str(len(donet)) + ' transactions succesfully executed.' box = box + TEMPLATE.tmpl_notification_box(message, teaser) return box def _generate_ticket_box_user(self, req, form, ln): ''' Generate the semi-permanent info box for a regular user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' return self._generate_ticket_box_guest(req, form, ln) def _generate_ticket_box_admin(self, req, form, ln): ''' Generate the semi-permanent info box for an admin user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' return self._generate_ticket_box_guest(req, form, ln) def _generate_person_info_box_guest(self, req, form, ln): ''' Generate the name info box for a guest user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' return self._generate_person_info_box_admin(req, form, ln) def _generate_person_info_box_user(self, req, form, ln): ''' Generate the name info box for a regular user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' return self._generate_person_info_box_admin(req, form, ln) def _generate_person_info_box_admin(self, req, form, ln): ''' Generate the name info box for an admin user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' names = webapi.get_person_names_from_id(self.person_id) box = TEMPLATE.tmpl_admin_person_info_box(ln, person_id=self.person_id, names=names) return box def _generate_tabs_guest(self, req, form, ln): ''' Generate the tabs content for a guest user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' session = get_session(req) # uid = getUid(req) pinfo = session["personinfo"] if 'ln' in pinfo: ln = pinfo["ln"] else: ln = CFG_SITE_LANG _ = gettext_set_language(ln) links = [] # ['delete', 'commit','del_entry','commit_entry'] tabs = ['records', 'repealed', 'review'] verbiage_dict = {'confirmed': 'Papers', 'repealed': _('Papers removed from this profile'), 'review': _('Papers in need of review'), 'tickets': _('Open Tickets'), 'data': _('Data'), 'confirmed_ns': _('Papers of this Person'), 'repealed_ns': _('Papers _not_ of this Person'), 'review_ns': _('Papers in need of review'), 'tickets_ns': _('Tickets for this Person'), 'data_ns': _('Additional Data for this Person')} buttons_verbiage_dict = {'mass_buttons': {'no_doc_string': _('Sorry, there are currently no documents to be found in this category.'), 'b_confirm': _('Yes, those papers are by this person.'), 'b_repeal': _('No, those papers are not by this person'), 'b_to_others': _('Assign to other person'), 'b_forget': _('Forget decision')}, 'record_undecided': {'alt_confirm': _('Confirm!'), 'confirm_text': _('Yes, this paper is by this person.'), 'alt_repeal': _('Rejected!'), 'repeal_text': _('No, this paper is not by this person'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}, 'record_confirmed': {'alt_confirm': _('Confirmed.'), 'confirm_text': _('Marked as this person\'s paper'), 'alt_forget': _('Forget decision!'), 'forget_text': _('Forget decision.'), 'alt_repeal': _('Repeal!'), 'repeal_text': _('But it\'s not this person\'s paper.'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}, 'record_repealed': {'alt_confirm': _('Confirm!'), 'confirm_text': _('But it is this person\'s paper.'), 'alt_forget': _('Forget decision!'), 'forget_text': _('Forget decision.'), 'alt_repeal': _('Repealed'), 'repeal_text': _('Marked as not this person\'s paper'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}} return self._generate_tabs_admin(req, form, ln, show_tabs=tabs, ticket_links=links, open_tickets=[], verbiage_dict=verbiage_dict, buttons_verbiage_dict=buttons_verbiage_dict) def _generate_tabs_user(self, req, form, ln): ''' Generate the tabs content for a regular user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string ''' session = get_session(req) uid = getUid(req) pinfo = session['personinfo'] if 'ln' in pinfo: ln = pinfo["ln"] else: ln = CFG_SITE_LANG _ = gettext_set_language(ln) links = ['delete', 'del_entry'] tabs = ['records', 'repealed', 'review', 'tickets'] if pinfo["claimpaper_admin_last_viewed_pid"] == webapi.get_pid_from_uid(uid)[0][0]: verbiage_dict = {'confirmed': _('Your papers'), 'repealed': _('Not your papers'), 'review': _('Papers in need of review'), 'tickets': _('Your tickets'), 'data': _('Data'), 'confirmed_ns': _('Your papers'), 'repealed_ns': _('Not your papers'), 'review_ns': _('Papers in need of review'), 'tickets_ns': _('Your tickets'), 'data_ns': _('Additional Data for this Person')} buttons_verbiage_dict = {'mass_buttons': {'no_doc_string': _('Sorry, there are currently no documents to be found in this category.'), 'b_confirm': _('These are mine!'), 'b_repeal': _('These are not mine!'), 'b_to_others': _('It\'s not mine, but I know whose it is!'), 'b_forget': _('Forget decision')}, 'record_undecided': {'alt_confirm': _('Mine!'), 'confirm_text': _('This is my record!'), 'alt_repeal': _('Not mine!'), 'repeal_text': _('This is not my paper!'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}, 'record_confirmed': {'alt_confirm': _('Not Mine.'), 'confirm_text': _('Marked as my record!'), 'alt_forget': _('Forget decision!'), 'forget_text': _('Forget assignment decision'), 'alt_repeal': _('Not Mine!'), 'repeal_text': _('But this is mine!'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}, 'record_repealed': {'alt_confirm': _('Mine!'), 'confirm_text': _('But this is my record!'), 'alt_forget': _('Forget decision!'), 'forget_text': _('Forget decision!'), 'alt_repeal': _('Not Mine!'), 'repeal_text': _('Marked as not your record.'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}} else: verbiage_dict = {'confirmed': _('Papers'), 'repealed': _('Papers removed from this profile'), 'review': _('Papers in need of review'), 'tickets': _('Your tickets'), 'data': _('Data'), 'confirmed_ns': _('Papers of this Person'), 'repealed_ns': _('Papers _not_ of this Person'), 'review_ns': _('Papers in need of review'), 'tickets_ns': _('Tickes you created about this person'), 'data_ns': _('Additional Data for this Person')} buttons_verbiage_dict = {'mass_buttons': {'no_doc_string': _('Sorry, there are currently no documents to be found in this category.'), 'b_confirm': _('Yes, those papers are by this person.'), 'b_repeal': _('No, those papers are not< by this person'), 'b_to_others': _('Assign to other person'), 'b_forget': _('Forget decision')}, 'record_undecided': {'alt_confirm': _('Confirm!'), 'confirm_text': _('Yes, this paper is by this person.'), 'alt_repeal': _('Rejected!'), 'repeal_text': _('No, this paper is not by this person'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}, 'record_confirmed': {'alt_confirm': _('Confirmed.'), 'confirm_text': _('Marked as this person\'s paper'), 'alt_forget': _('Forget decision!'), 'forget_text': _('Forget decision.'), 'alt_repeal': _('Repeal!'), 'repeal_text': _('But it\'s not this person\'s paper.'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}, 'record_repealed': {'alt_confirm': _('Confirm!'), 'confirm_text': _('But it is this person\'s paper.'), 'alt_forget': _('Forget decision!'), 'forget_text': _('Forget decision.'), 'alt_repeal': _('Repealed'), 'repeal_text': _('Marked as not this person\'s paper'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}} session = get_session(req) uid = getUid(req) open_tickets = webapi.get_person_request_ticket(self.person_id) tickets = [] for t in open_tickets: owns = False for row in t[0]: if row[0] == 'uid-ip' and row[1].split('||')[0] == str(uid): owns = True if owns: tickets.append(t) return self._generate_tabs_admin(req, form, ln, show_tabs=tabs, ticket_links=links, open_tickets=tickets, verbiage_dict=verbiage_dict, buttons_verbiage_dict=buttons_verbiage_dict) def _generate_tabs_admin(self, req, form, ln, show_tabs=['records', 'repealed', 'review', 'comments', 'tickets', 'data'], open_tickets=None, ticket_links=['delete', 'commit', 'del_entry', 'commit_entry'], verbiage_dict=None, buttons_verbiage_dict=None): ''' Generate the tabs content for an admin user @param req: Apache Request Object @type req: Apache Request Object @param form: POST/GET variables of the request @type form: dict @param ln: language to show this page in @type ln: string @param show_tabs: list of tabs to display @type show_tabs: list of strings @param ticket_links: list of links to display @type ticket_links: list of strings @param verbiage_dict: language for the elements @type verbiage_dict: dict @param buttons_verbiage_dict: language for the buttons @type buttons_verbiage_dict: dict ''' session = get_session(req) personinfo = {} records = [] try: personinfo = session["personinfo"] except KeyError: return "" if 'ln' in personinfo: ln = personinfo["ln"] else: ln = CFG_SITE_LANG _ = gettext_set_language(ln) if not verbiage_dict: verbiage_dict = self._get_default_verbiage_dicts_for_admin(req) if not buttons_verbiage_dict: buttons_verbiage_dict = self._get_default_buttons_verbiage_dicts_for_admin(req) all_papers = webapi.get_papers_by_person_id(self.person_id, ext_out=True) for paper in all_papers: records.append({'recid': paper[0], 'bibref': paper[1], 'flag': paper[2], 'authorname': paper[3], 'authoraffiliation': paper[4], 'paperdate': paper[5], 'rt_status': paper[6]}) rejected_papers = [row for row in records if row['flag'] < -1] rest_of_papers = [row for row in records if row['flag'] >= -1] review_needed = webapi.get_review_needing_records(self.person_id) if len(review_needed) < 1: if 'review' in show_tabs: show_tabs.remove('review') rt_tickets = None if open_tickets == None: open_tickets = webapi.get_person_request_ticket(self.person_id) else: if len(open_tickets) < 1: if 'tickets' in show_tabs: show_tabs.remove('tickets') if "admin_requested_ticket_id" in personinfo: rt_tickets = personinfo["admin_requested_ticket_id"] # Send data to template function tabs = TEMPLATE.tmpl_admin_tabs(ln, person_id=self.person_id, rejected_papers=rejected_papers, rest_of_papers=rest_of_papers, review_needed=review_needed, rt_tickets=rt_tickets, open_rt_tickets=open_tickets, show_tabs=show_tabs, ticket_links=ticket_links, verbiage_dict=verbiage_dict, buttons_verbiage_dict=buttons_verbiage_dict) return tabs def _get_default_verbiage_dicts_for_admin(self, req): session = get_session(req) personinfo = {} try: personinfo = session["personinfo"] except KeyError: return "" if 'ln' in personinfo: ln = personinfo["ln"] else: ln = CFG_SITE_LANG _ = gettext_set_language(ln) verbiage_dict = {'confirmed': _('Papers'), 'repealed': _('Papers removed from this profile'), 'review': _('Papers in need of review'), 'tickets': _('Tickets'), 'data': _('Data'), 'confirmed_ns': _('Papers of this Person'), 'repealed_ns': _('Papers _not_ of this Person'), 'review_ns': _('Papers in need of review'), 'tickets_ns': _('Request Tickets'), 'data_ns': _('Additional Data for this Person')} return verbiage_dict def _get_default_buttons_verbiage_dicts_for_admin(self, req): session = get_session(req) personinfo = {} try: personinfo = session["personinfo"] except KeyError: return "" if 'ln' in personinfo: ln = personinfo["ln"] else: ln = CFG_SITE_LANG _ = gettext_set_language(ln) buttons_verbiage_dict = {'mass_buttons': {'no_doc_string': _('Sorry, there are currently no documents to be found in this category.'), 'b_confirm': _('Yes, those papers are by this person.'), 'b_repeal': _('No, those papers are not by this person'), 'b_to_others': _('Assign to other person'), 'b_forget': _('Forget decision')}, 'record_undecided': {'alt_confirm': _('Confirm!'), 'confirm_text': _('Yes, this paper is by this person.'), 'alt_repeal': _('Rejected!'), 'repeal_text': _('No, this paper is not by this person'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}, 'record_confirmed': {'alt_confirm': _('Confirmed.'), 'confirm_text': _('Marked as this person\'s paper'), 'alt_forget': _('Forget decision!'), 'forget_text': _('Forget decision.'), 'alt_repeal': _('Repeal!'), 'repeal_text': _('But it\'s not this person\'s paper.'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}, 'record_repealed': {'alt_confirm': _('Confirm!'), 'confirm_text': _('But it is this person\'s paper.'), 'alt_forget': _('Forget decision!'), 'forget_text': _('Forget decision.'), 'alt_repeal': _('Repealed'), 'repeal_text': _('Marked as not this person\'s paper'), 'to_other_text': _('Assign to another person'), 'alt_to_other': _('To other person!')}} return buttons_verbiage_dict def _generate_footer_guest(self, req, form, ln): return self._generate_footer_admin(req, form, ln) def _generate_footer_user(self, req, form, ln): return self._generate_footer_admin(req, form, ln) def _generate_footer_admin(self, req, form, ln): return TEMPLATE.tmpl_invenio_search_box() def _ticket_dispatch_guest(self, req): ''' Takes care of the ticket when in guest mode ''' return self._ticket_dispatch_user(req) def _ticket_dispatch_user(self, req): ''' Takes care of the ticket when in user and guest mode ''' session = get_session(req) uid = getUid(req) pinfo = session["personinfo"] # ulevel = pinfo["ulevel"] ticket = pinfo["ticket"] bibref_check_required = self._ticket_review_bibref_check(req) if bibref_check_required: return bibref_check_required for t in ticket: t['status'] = webapi.check_transaction_permissions(uid, t['bibref'], t['pid'], t['action']) session.save() return self._ticket_final_review(req) def _ticket_dispatch_admin(self, req): ''' Takes care of the ticket when in administrator mode ''' session = get_session(req) uid = getUid(req) pinfo = session["personinfo"] # ulevel = pinfo["ulevel"] ticket = pinfo["ticket"] bibref_check_required = self._ticket_review_bibref_check(req) if bibref_check_required: return bibref_check_required for t in ticket: t['status'] = webapi.check_transaction_permissions(uid, t['bibref'], t['pid'], t['action']) session.save() return self._ticket_final_review(req) def _ticket_review_bibref_check(self, req): ''' checks if some of the transactions on the ticket are needing a review. If it's the case prompts the user to select the right bibref ''' session = get_session(req) pinfo = session["personinfo"] ticket = pinfo["ticket"] needs_review = [] if 'ln' in pinfo: ln = pinfo["ln"] else: ln = CFG_SITE_LANG _ = gettext_set_language(ln) if ("bibref_check_required" in pinfo and pinfo["bibref_check_required"] and "bibref_check_reviewed_bibrefs" in pinfo): if pinfo["bibref_check_reviewed_bibrefs"]: for rbibreft in pinfo["bibref_check_reviewed_bibrefs"]: if not rbibreft.count("||") or not rbibreft.count(","): continue rpid, rbibref = rbibreft.split("||") rrecid = rbibref.split(",")[1] rpid = webapi.wash_integer_id(rpid) for ticket_update in [row for row in ticket if (row['bibref'] == str(rrecid) and row['pid'] == rpid)]: ticket_update["bibref"] = rbibref del(ticket_update["incomplete"]) for ticket_remove in [row for row in ticket if ('incomplete' in row)]: ticket.remove(ticket_remove) if ("bibrefs_auto_assigned" in pinfo): del(pinfo["bibrefs_auto_assigned"]) if ("bibrefs_to_confirm" in pinfo): del(pinfo["bibrefs_to_confirm"]) del(pinfo["bibref_check_reviewed_bibrefs"]) pinfo["bibref_check_required"] = False session.save() return "" else: bibrefs_auto_assigned = {} bibrefs_to_confirm = {} # if ("bibrefs_auto_assigned" in pinfo # and pinfo["bibrefs_auto_assigned"]): # bibrefs_auto_assigned = pinfo["bibrefs_auto_assigned"] # # if ("bibrefs_to_confirm" in pinfo # and pinfo["bibrefs_to_confirm"]): # bibrefs_to_confirm = pinfo["bibrefs_to_confirm"] for transaction in ticket: if not webapi.is_valid_bibref(transaction['bibref']): transaction['incomplete'] = True needs_review.append(transaction) if not needs_review: pinfo["bibref_check_required"] = False session.save() return "" for transaction in needs_review: recid = webapi.wash_integer_id(transaction['bibref']) if recid < 0: continue #this doesn't look like a recid--discard! pid = transaction['pid'] if ((pid in bibrefs_auto_assigned and 'bibrecs' in bibrefs_auto_assigned[pid] and recid in bibrefs_auto_assigned[pid]['bibrecs']) or (pid in bibrefs_to_confirm and 'bibrecs' in bibrefs_to_confirm[pid] and recid in bibrefs_to_confirm[pid]['bibrecs'])): continue # we already assessed those bibrefs. fctptr = webapi.get_possible_bibrefs_from_pid_bibrec bibrec_refs = fctptr(pid, [recid]) person_name = webapi.get_most_frequent_name_from_pid(pid) for brr in bibrec_refs: if len(brr[1]) == 1: if not pid in bibrefs_auto_assigned: bibrefs_auto_assigned[pid] = { 'person_name': person_name, 'canonical_id': "TBA", 'bibrecs': {brr[0]: brr[1]}} else: bibrefs_auto_assigned[pid]['bibrecs'][brr[0]] = brr[1] else: if not brr[1]: tmp = webapi.get_bibrefs_from_bibrecs([brr[0]]) try: brr[1] = tmp[0][1] except IndexError: continue # No bibrefs on record--discard if not pid in bibrefs_to_confirm: bibrefs_to_confirm[pid] = { 'person_name': person_name, 'canonical_id': "TBA", 'bibrecs': {brr[0]: brr[1]}} else: bibrefs_to_confirm[pid]['bibrecs'][brr[0]] = brr[1] if bibrefs_to_confirm or bibrefs_auto_assigned: pinfo["bibref_check_required"] = True baa = deepcopy(bibrefs_auto_assigned) btc = deepcopy(bibrefs_to_confirm) for pid in baa: for rid in baa[pid]['bibrecs']: baa[pid]['bibrecs'][rid] = [] for pid in btc: for rid in btc[pid]['bibrecs']: btc[pid]['bibrecs'][rid] = [] pinfo["bibrefs_auto_assigned"] = baa pinfo["bibrefs_to_confirm"] = btc else: pinfo["bibref_check_required"] = False session.save() body = TEMPLATE.tmpl_bibref_check(bibrefs_auto_assigned, bibrefs_to_confirm) body = TEMPLATE.tmpl_person_detail_layout(body) metaheaderadd = self._scripts(kill_browser_cache=True) title = _("Submit Attribution Information") return page(title=title, metaheaderadd=metaheaderadd, body=body, req=req, language=ln) def _ticket_final_review(self, req): ''' displays the user what can/cannot finally be done, leaving the option of kicking some transactions from the ticket before commit ''' session = get_session(req) uid = getUid(req) userinfo = collect_user_info(uid) pinfo = session["personinfo"] ulevel = pinfo["ulevel"] ticket = pinfo["ticket"] ticket = [row for row in ticket if not "execution_result" in row] skip_checkout_page = True upid = -1 user_first_name = "" user_first_name_sys = False user_last_name = "" user_last_name_sys = False user_email = "" user_email_sys = False if 'ln' in pinfo: ln = pinfo["ln"] else: ln = CFG_SITE_LANG _ = gettext_set_language(ln) if ("external_firstname" in userinfo and userinfo["external_firstname"]): user_first_name = userinfo["external_firstname"] user_first_name_sys = True elif "user_first_name" in pinfo and pinfo["user_first_name"]: user_first_name = pinfo["user_first_name"] if ("external_familyname" in userinfo and userinfo["external_familyname"]): user_last_name = userinfo["external_familyname"] user_last_name_sys = True elif "user_last_name" in pinfo and pinfo["user_last_name"]: user_last_name = pinfo["user_last_name"] if ("email" in userinfo and not userinfo["email"] == "guest"): user_email = userinfo["email"] user_email_sys = True elif "user_email" in pinfo and pinfo["user_email"]: user_email = pinfo["user_email"] pinfo["user_first_name"] = user_first_name pinfo["user_first_name_sys"] = user_first_name_sys pinfo["user_last_name"] = user_last_name pinfo["user_last_name_sys"] = user_last_name_sys pinfo["user_email"] = user_email pinfo["user_email_sys"] = user_email_sys if "upid" in pinfo and pinfo["upid"]: upid = pinfo["upid"] else: dbpid = webapi.get_pid_from_uid(uid) if dbpid and dbpid[1]: if dbpid[0] and not dbpid[0] == -1: upid = dbpid[0][0] pinfo["upid"] = upid session.save() if not (user_first_name or user_last_name or user_email): skip_checkout_page = False if [row for row in ticket if row["status"] in ["denied", "warning_granted", "warning_denied"]]: skip_checkout_page = False if (not ticket or skip_checkout_page or ("checkout_confirmed" in pinfo and pinfo["checkout_confirmed"] and "checkout_faulty_fields" in pinfo and not pinfo["checkout_faulty_fields"])): self.adf['ticket_commit'][ulevel](req) if "checkout_confirmed" in pinfo: del(pinfo["checkout_confirmed"]) if "checkout_faulty_fields" in pinfo: del(pinfo["checkout_faulty_fields"]) if "bibref_check_required" in pinfo: del(pinfo["bibref_check_required"]) # if "user_ticket_comments" in pinfo: # del(pinfo["user_ticket_comments"]) session.save() return self._ticket_dispatch_end(req) for tt in list(ticket): if not 'bibref' in tt or not 'pid' in tt: del(ticket[tt]) continue tt['authorname_rec'] = webapi.get_bibref_name_string(tt['bibref']) tt['person_name'] = webapi.get_most_frequent_name_from_pid(tt['pid']) mark_yours = [] mark_not_yours = [] if upid >= 0: mark_yours = [row for row in ticket if (str(row["pid"]) == str(upid) and row["action"] in ["to_other_person", "confirm"])] mark_not_yours = [row for row in ticket if (str(row["pid"]) == str(upid) and row["action"] in ["repeal", "reset"])] mark_theirs = [row for row in ticket if ((not str(row["pid"]) == str(upid)) and row["action"] in ["to_other_person", "confirm"])] mark_not_theirs = [row for row in ticket if ((not str(row["pid"]) == str(upid)) and row["action"] in ["repeal", "reset"])] session.save() body = TEMPLATE.tmpl_ticket_final_review(req, mark_yours, mark_not_yours, mark_theirs, mark_not_theirs) body = TEMPLATE.tmpl_person_detail_layout(body) metaheaderadd = self._scripts(kill_browser_cache=True) title = _("Please review your actions") #body = body + '
' + pformat(pinfo) + '
' return page(title=title, metaheaderadd=metaheaderadd, body=body, req=req, language=ln) def _ticket_commit_admin(self, req): ''' Actual execution of the ticket transactions ''' self._clean_ticket(req) session = get_session(req) uid = getUid(req) pinfo = session["personinfo"] ticket = pinfo["ticket"] userinfo = {'uid-ip': "%s||%s" % (uid, req.remote_ip)} if "user_ticket_comments" in pinfo: userinfo['comments'] = pinfo["user_ticket_comments"] if "user_first_name" in pinfo: userinfo['firstname'] = pinfo["user_first_name"] if "user_last_name" in pinfo: userinfo['lastname'] = pinfo["user_last_name"] if "user_email" in pinfo: userinfo['email'] = pinfo["user_email"] for t in ticket: t['execution_result'] = webapi.execute_action(t['action'], t['pid'], t['bibref'], uid, userinfo['uid-ip'], str(userinfo)) session.save() def _ticket_commit_user(self, req): ''' Actual execution of the ticket transactions ''' self._clean_ticket(req) session = get_session(req) uid = getUid(req) pinfo = session["personinfo"] ticket = pinfo["ticket"] ok_tickets = [] userinfo = {'uid-ip': "%s||%s" % (uid, req.remote_ip)} if "user_ticket_comments" in pinfo: userinfo['comments'] = pinfo["user_ticket_comments"] if "user_first_name" in pinfo: userinfo['firstname'] = pinfo["user_first_name"] if "user_last_name" in pinfo: userinfo['lastname'] = pinfo["user_last_name"] if "user_email" in pinfo: userinfo['email'] = pinfo["user_email"] for t in list(ticket): if t['status'] in ['granted', 'warning_granted']: t['execution_result'] = webapi.execute_action(t['action'], t['pid'], t['bibref'], uid, userinfo['uid-ip'], str(userinfo)) ok_tickets.append(t) ticket.remove(t) else: webapi.create_request_ticket(userinfo, ticket) for t in ticket: t['execution_result'] = True ticket[:] = ok_tickets session.save() def _ticket_commit_guest(self, req): ''' Actual execution of the ticket transactions ''' self._clean_ticket(req) session = get_session(req) pinfo = session["personinfo"] uid = getUid(req) userinfo = {'uid-ip': "userid: %s (from %s)" % (uid, req.remote_ip)} if "user_ticket_comments" in pinfo: if pinfo["user_ticket_comments"]: userinfo['comments'] = pinfo["user_ticket_comments"] else: userinfo['comments'] = "No comments submitted." if "user_first_name" in pinfo: userinfo['firstname'] = pinfo["user_first_name"] if "user_last_name" in pinfo: userinfo['lastname'] = pinfo["user_last_name"] if "user_email" in pinfo: userinfo['email'] = pinfo["user_email"] ticket = pinfo['ticket'] webapi.create_request_ticket(userinfo, ticket) for t in ticket: t['execution_result'] = True session.save() def _ticket_dispatch_end(self, req): ''' The ticket dispatch is finished, redirect to the original page of origin or to the last_viewed_pid ''' session = get_session(req) pinfo = session["personinfo"] if 'claim_in_process' in pinfo: pinfo['claim_in_process'] = False uinfo = collect_user_info(req) uinfo['precached_viewclaimlink'] = True uid = getUid(req) set_user_preferences(uid, uinfo) if "referer" in pinfo and pinfo["referer"]: referer = pinfo["referer"] del(pinfo["referer"]) session.save() return redirect_to_url(req, referer) return redirect_to_url(req, "%s/person/%s" % (CFG_SITE_URL, webapi.get_person_redirect_link( pinfo["claimpaper_admin_last_viewed_pid"]))) def _clean_ticket(self, req): ''' Removes from a ticket the transactions with an execution_result flag ''' session = get_session(req) pinfo = session["personinfo"] ticket = pinfo["ticket"] for t in list(ticket): if 'execution_result' in t: ticket.remove(t) session.save() def __get_user_role(self, req): ''' Determines whether a user is guest, user or admin ''' minrole = 'guest' role = 'guest' if not req: return minrole uid = getUid(req) if not isinstance(uid, int): return minrole admin_role_id = acc_get_role_id(CLAIMPAPER_ADMIN_ROLE) user_role_id = acc_get_role_id(CLAIMPAPER_USER_ROLE) user_roles = acc_get_user_roles(uid) if admin_role_id in user_roles: role = 'admin' elif user_role_id in user_roles: role = 'user' if webapi.is_external_user(uid): role = 'user' return role def __user_is_authorized(self, req, action): ''' Determines if a given user is authorized to perform a specified action @param req: Apache Request Object @type req: Apache Request Object @param action: the action the user wants to perform @type action: string @return: True if user is allowed to perform the action, False if not @rtype: boolean ''' if not req: return False if not action: return False else: action = escape(action) uid = getUid(req) if not isinstance(uid, int): return False if uid == 0: return False allowance = [i[1] for i in acc_find_user_role_actions({'uid': uid}) if i[1] == action] if allowance: return True return False def _scripts(self, kill_browser_cache=False): ''' Returns html code to be included in the meta header of the html page. The actual code is stored in the template. @return: html formatted Javascript and CSS inclusions for the @rtype: string ''' return TEMPLATE.tmpl_meta_includes(kill_browser_cache) def _check_user_fields(self, req, form): argd = wash_urlargd( form, {'ln': (str, CFG_SITE_LANG), 'user_first_name': (str, None), 'user_last_name': (str, None), 'user_email': (str, None), 'user_comments': (str, None)}) session = get_session(req) pinfo = session["personinfo"] ulevel = pinfo["ulevel"] skip_checkout_faulty_fields = False if ulevel in ['user', 'admin']: skip_checkout_faulty_fields = True if not ("user_first_name_sys" in pinfo and pinfo["user_first_name_sys"]): if "user_first_name" in argd: if not argd["user_first_name"] and not skip_checkout_faulty_fields: pinfo["checkout_faulty_fields"].append("user_first_name") else: pinfo["user_first_name"] = escape(argd["user_first_name"]) if not ("user_last_name_sys" in pinfo and pinfo["user_last_name_sys"]): if "user_last_name" in argd: if not argd["user_last_name"] and not skip_checkout_faulty_fields: pinfo["checkout_faulty_fields"].append("user_last_name") else: pinfo["user_last_name"] = escape(argd["user_last_name"]) if not ("user_email_sys" in pinfo and pinfo["user_email_sys"]): if "user_email" in argd: if (not argd["user_email"] or not email_valid_p(argd["user_email"])): pinfo["checkout_faulty_fields"].append("user_email") else: pinfo["user_email"] = escape(argd["user_email"]) if (ulevel == "guest" and emailUnique(argd["user_email"]) > 0): pinfo["checkout_faulty_fields"].append("user_email_taken") if "user_comments" in argd: if argd["user_comments"]: pinfo["user_ticket_comments"] = escape(argd["user_comments"]) else: pinfo["user_ticket_comments"] = "" session.save() def action(self, req, form): ''' Initial step in processing of requests: ticket generation/update. Also acts as action dispatcher for interface mass action requests Valid mass actions are: - confirm: confirm assignments to a person - repeal: repeal assignments from a person - reset: reset assignments of a person - cancel: clean the session (erase tickets and so on) - to_other_person: assign a document from a person to another person @param req: Apache Request Object @type req: Apache Request Object @param form: Parameters sent via GET or POST request @type form: dict @return: a full page formatted in HTML @return: string ''' self._session_bareinit(req) argd = wash_urlargd( form, {'ln': (str, CFG_SITE_LANG), 'pid': (int, None), 'confirm': (str, None), 'repeal': (str, None), 'reset': (str, None), 'cancel': (str, None), 'cancel_stage': (str, None), 'bibref_check_submit': (str, None), 'checkout': (str, None), 'checkout_continue_claiming': (str, None), 'checkout_submit': (str, None), 'checkout_remove_transaction': (str, None), 'to_other_person': (str, None), 'cancel_search_ticket': (str, None), 'user_first_name': (str, None), 'user_last_name': (str, None), 'user_email': (str, None), 'user_comments': (str, None), 'claim': (str, None), 'cancel_rt_ticket': (str, None), 'commit_rt_ticket': (str, None), 'rt_id': (int, None), 'rt_action': (str, None), 'selection': (list, []), 'set_canonical_name': (str, None), 'canonical_name': (str, None)}) ln = wash_language(argd['ln']) pid = None action = None bibrefs = None session = get_session(req) uid = getUid(req) pinfo = session["personinfo"] ulevel = pinfo["ulevel"] ticket = pinfo["ticket"] tempticket = [] if not "ln" in pinfo: pinfo["ln"] = ln session.save() if 'confirm' in argd and argd['confirm']: action = 'confirm' elif 'repeal' in argd and argd['repeal']: action = 'repeal' elif 'reset' in argd and argd['reset']: action = 'reset' elif 'bibref_check_submit' in argd and argd['bibref_check_submit']: action = 'bibref_check_submit' elif 'cancel' in argd and argd['cancel']: action = 'cancel' elif 'cancel_stage' in argd and argd['cancel_stage']: action = 'cancel_stage' elif 'cancel_search_ticket' in argd and argd['cancel_search_ticket']: action = 'cancel_search_ticket' elif 'checkout' in argd and argd['checkout']: action = 'checkout' elif 'checkout_submit' in argd and argd['checkout_submit']: action = 'checkout_submit' elif ('checkout_remove_transaction' in argd and argd['checkout_remove_transaction']): action = 'checkout_remove_transaction' elif ('checkout_continue_claiming' in argd and argd['checkout_continue_claiming']): action = "checkout_continue_claiming" elif 'cancel_rt_ticket' in argd and argd['cancel_rt_ticket']: action = 'cancel_rt_ticket' elif 'commit_rt_ticket' in argd and argd['commit_rt_ticket']: action = 'commit_rt_ticket' elif 'to_other_person' in argd and argd['to_other_person']: action = 'to_other_person' elif 'claim' in argd and argd['claim']: action = 'claim' elif 'set_canonical_name' in argd and argd['set_canonical_name']: action = 'set_canonical_name' no_access = self._page_access_permission_wall(req, pid) if no_access and not action in ["claim"]: return no_access if action in ['to_other_person', 'claim']: if 'selection' in argd and len(argd['selection']) > 0: bibrefs = argd['selection'] else: return self._error_page(req, ln, "Fatal: cannot create ticket without any bibrefrec") if action == 'claim': return self._ticket_open_claim(req, bibrefs, ln) else: return self._ticket_open_assign_to_other_person(req, bibrefs, form) if action in ["cancel_stage"]: if 'bibref_check_required' in pinfo: del(pinfo['bibref_check_required']) if 'bibrefs_auto_assigned' in pinfo: del(pinfo['bibrefs_auto_assigned']) if 'bibrefs_to_confirm' in pinfo: del(pinfo['bibrefs_to_confirm']) for tt in [row for row in ticket if 'incomplete' in row]: ticket.remove(tt) session.save() return self._ticket_dispatch_end(req) if action in ["checkout_submit"]: pinfo["checkout_faulty_fields"] = [] self._check_user_fields(req, form) if not ticket: pinfo["checkout_faulty_fields"].append("tickets") if pinfo["checkout_faulty_fields"]: pinfo["checkout_confirmed"] = False else: pinfo["checkout_confirmed"] = True session.save() return self.adf['ticket_dispatch'][ulevel](req) #return self._ticket_final_review(req) if action in ["checkout_remove_transaction"]: bibref = argd['checkout_remove_transaction'] if webapi.is_valid_bibref(bibref): for rmt in [row for row in ticket if row["bibref"] == bibref]: ticket.remove(rmt) pinfo["checkout_confirmed"] = False session.save() return self.adf['ticket_dispatch'][ulevel](req) #return self._ticket_final_review(req) if action in ["checkout_continue_claiming"]: pinfo["checkout_faulty_fields"] = [] self._check_user_fields(req, form) return self._ticket_dispatch_end(req) if (action in ['bibref_check_submit'] or (not action and "bibref_check_required" in pinfo and pinfo["bibref_check_required"])): if not action in ['bibref_check_submit']: if "bibref_check_reviewed_bibrefs" in pinfo: del(pinfo["bibref_check_reviewed_bibrefs"]) session.save() return self.adf['ticket_dispatch'][ulevel](req) pinfo["bibref_check_reviewed_bibrefs"] = [] add_rev = pinfo["bibref_check_reviewed_bibrefs"].append if ("bibrefs_auto_assigned" in pinfo or "bibrefs_to_confirm" in pinfo): person_reviews = [] if ("bibrefs_auto_assigned" in pinfo and pinfo["bibrefs_auto_assigned"]): person_reviews.append(pinfo["bibrefs_auto_assigned"]) if ("bibrefs_to_confirm" in pinfo and pinfo["bibrefs_to_confirm"]): person_reviews.append(pinfo["bibrefs_to_confirm"]) for ref_review in person_reviews: for person_id in ref_review: for bibrec in ref_review[person_id]["bibrecs"]: rec_grp = "bibrecgroup%s" % bibrec elements = [] if rec_grp in form: if isinstance(form[rec_grp], str): elements.append(form[rec_grp]) elif isinstance(form[rec_grp], list): elements += form[rec_grp] else: continue for element in elements: test = element.split("||") if test and len(test) > 1 and test[1]: tref = test[1] + "," + str(bibrec) tpid = webapi.wash_integer_id(test[0]) if (webapi.is_valid_bibref(tref) and tpid > -1): add_rev(element + "," + str(bibrec)) session.save() return self.adf['ticket_dispatch'][ulevel](req) if not action: return self._error_page(req, ln, "Fatal: cannot create ticket if no action selected.") if action in ['confirm', 'repeal', 'reset']: if 'pid' in argd: pid = argd['pid'] else: return self._error_page(req, ln, "Fatal: cannot create ticket without a person id!") if 'selection' in argd and len(argd['selection']) > 0: bibrefs = argd['selection'] else: if pid == -3: return self._error_page(req, ln, "Fatal: Please select a paper to assign to the new person first!") else: return self._error_page(req, ln, "Fatal: cannot create ticket without any paper selected!") if 'rt_id' in argd and argd['rt_id']: rt_id = argd['rt_id'] for b in bibrefs: self._cancel_transaction_from_rt_ticket(rt_id, pid, action, b) #create temporary ticket if pid == -3: pid = webapi.create_new_person(uid) for bibref in bibrefs: tempticket.append({'pid': pid, 'bibref': bibref, 'action': action}) #check if ticket targets (bibref for pid) are already in ticket for t in tempticket: for e in list(ticket): if e['pid'] == t['pid'] and e['bibref'] == t['bibref']: ticket.remove(e) ticket.append(t) if 'search_ticket' in pinfo: del(pinfo['search_ticket']) session.save() #start ticket processing chain pinfo["claimpaper_admin_last_viewed_pid"] = pid return self.adf['ticket_dispatch'][ulevel](req) # return self.perform(req, form) elif action in ['cancel']: self.__session_cleanup(req) # return self._error_page(req, ln, # "Not an error! Session cleaned! but " # "redirect to be implemented") return self._ticket_dispatch_end(req) elif action in ['cancel_search_ticket']: if 'search_ticket' in pinfo: del(pinfo['search_ticket']) session.save() if "claimpaper_admin_last_viewed_pid" in pinfo: pid = pinfo["claimpaper_admin_last_viewed_pid"] return redirect_to_url(req, "/person/%s" % webapi.get_person_redirect_link(pid)) return self.search(req, form) elif action in ['checkout']: return self.adf['ticket_dispatch'][ulevel](req) #return self._ticket_final_review(req) elif action in ['cancel_rt_ticket', 'commit_rt_ticket']: if 'selection' in argd and len(argd['selection']) > 0: bibref = argd['selection'] else: return self._error_page(req, ln, "Fatal: cannot cancel unknown ticket") if 'pid' in argd and argd['pid'] > -1: pid = argd['pid'] else: return self._error_page(req, ln, "Fatal: cannot cancel unknown ticket") if action == 'cancel_rt_ticket': if 'rt_id' in argd and argd['rt_id'] and 'rt_action' in argd and argd['rt_action']: rt_id = argd['rt_id'] rt_action = argd['rt_action'] if 'selection' in argd and len(argd['selection']) > 0: bibrefs = argd['selection'] else: return self._error_page(req, ln, "Fatal: no bibref") for b in bibrefs: self._cancel_transaction_from_rt_ticket(rt_id, pid, rt_action, b) return redirect_to_url(req, "/person/%s" % webapi.get_person_redirect_link(pid)) return self._cancel_rt_ticket(req, bibref[0], pid) elif action == 'commit_rt_ticket': return self._commit_rt_ticket(req, bibref[0], pid) elif action == 'set_canonical_name': if 'pid' in argd and argd['pid'] > -1: pid = argd['pid'] else: return self._error_page(req, ln, "Fatal: cannot set canonical name to unknown person") if 'canonical_name' in argd and argd['canonical_name']: cname = argd['canonical_name'] else: return self._error_page(req, ln, "Fatal: cannot set a custom canonical name without a suggestion") uid = getUid(req) userinfo = "%s||%s" % (uid, req.remote_ip) webapi.update_person_canonical_name(pid, cname, userinfo) return redirect_to_url(req, "/person/%s" % webapi.get_person_redirect_link(pid)) else: return self._error_page(req, ln, "Fatal: What were I supposed to do?") def _ticket_open_claim(self, req, bibrefs, ln): ''' Generate page to let user choose how to proceed @param req: Apache Request Object @type req: Apache Request Object @param bibrefs: list of record IDs to perform an action on @type bibrefs: list of int @param ln: language to display the page in @type ln: string ''' session = get_session(req) uid = getUid(req) uinfo = collect_user_info(req) pinfo = session["personinfo"] if 'ln' in pinfo: ln = pinfo["ln"] else: ln = CFG_SITE_LANG _ = gettext_set_language(ln) no_access = self._page_access_permission_wall(req) session.save() pid = -1 search_enabled = True if not no_access and uinfo["precached_usepaperclaim"]: tpid = webapi.get_pid_from_uid(uid) if tpid and tpid[0] and tpid[1] and tpid[0][0]: pid = tpid[0][0] if (not no_access and "claimpaper_admin_last_viewed_pid" in pinfo and pinfo["claimpaper_admin_last_viewed_pid"]): names = webapi.get_person_names_from_id(pinfo["claimpaper_admin_last_viewed_pid"]) names = sorted([i for i in names], key=lambda k: k[1], reverse=True) if len(names) > 0: if len(names[0]) > 0: last_viewed_pid = [pinfo["claimpaper_admin_last_viewed_pid"], names[0][0]] else: last_viewed_pid = False else: last_viewed_pid = False else: last_viewed_pid = False if no_access: search_enabled = False pinfo["referer"] = uinfo["referer"] session.save() body = TEMPLATE.tmpl_open_claim(bibrefs, pid, last_viewed_pid, search_enabled=search_enabled) body = TEMPLATE.tmpl_person_detail_layout(body) title = _('Claim this paper') metaheaderadd = self._scripts(kill_browser_cache=True) return page(title=title, metaheaderadd=metaheaderadd, body=body, req=req, language=ln) def _ticket_open_assign_to_other_person(self, req, bibrefs, form): ''' Initializes search to find a person to attach the selected records to @param req: Apache request object @type req: Apache request object @param bibrefs: list of record IDs to consider @type bibrefs: list of int @param form: GET/POST request parameters @type form: dict ''' session = get_session(req) pinfo = session["personinfo"] pinfo["search_ticket"] = dict() search_ticket = pinfo["search_ticket"] search_ticket['action'] = 'confirm' search_ticket['bibrefs'] = bibrefs session.save() return self.search(req, form) def comments(self, req, form): return "" def _cancel_rt_ticket(self, req, tid, pid): ''' deletes an RT ticket ''' webapi.delete_request_ticket(pid, tid) return redirect_to_url(req, "/person/%s" % webapi.get_person_redirect_link(str(pid))) def _cancel_transaction_from_rt_ticket(self, tid, pid, action, bibref): ''' deletes a transaction from an rt ticket ''' webapi.delete_transaction_from_request_ticket(pid, tid, action, bibref) def _commit_rt_ticket(self, req, bibref, pid): ''' Commit of an rt ticket: creates a real ticket and commits. ''' session = get_session(req) pinfo = session["personinfo"] ulevel = pinfo["ulevel"] ticket = pinfo["ticket"] open_rt_tickets = webapi.get_person_request_ticket(pid) tic = [a for a in open_rt_tickets if str(a[1]) == str(bibref)] if len(tic) > 0: tic = tic[0][0] #create temporary ticket tempticket = [] for t in tic: if t[0] in ['confirm', 'repeal']: tempticket.append({'pid': pid, 'bibref': t[1], 'action': t[0]}) #check if ticket targets (bibref for pid) are already in ticket for t in tempticket: for e in list(ticket): if e['pid'] == t['pid'] and e['bibref'] == t['bibref']: ticket.remove(e) ticket.append(t) session.save() #start ticket processing chain webapi.delete_request_ticket(pid, bibref) return self.adf['ticket_dispatch'][ulevel](req) def _error_page(self, req, ln=CFG_SITE_LANG, message=None, intro=True): ''' Create a page that contains a message explaining the error. @param req: Apache Request Object @type req: Apache Request Object @param ln: language @type ln: string @param message: message to be displayed @type message: string ''' body = [] _ = gettext_set_language(ln) if not message: message = "No further explanation available. Sorry." if intro: body.append(_("

We're sorry. An error occurred while " "handling your request. Please find more information " "below:

")) body.append("

%s

" % message) return page(title=_("Notice"), body="\n".join(body), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req) def __session_cleanup(self, req): ''' Cleans the session from all bibauthorid specific settings and with that cancels any transaction currently in progress. @param req: Apache Request Object @type req: Apache Request Object ''' session = get_session(req) try: pinfo = session["personinfo"] except KeyError: return if "ticket" in pinfo: pinfo['ticket'] = [] if "search_ticket" in pinfo: pinfo['search_ticket'] = dict() # clear up bibref checker if it's done. if ("bibref_check_required" in pinfo and not pinfo["bibref_check_required"]): if 'bibrefs_to_confirm' in pinfo: del(pinfo['bibrefs_to_confirm']) if "bibrefs_auto_assigned" in pinfo: del(pinfo["bibrefs_auto_assigned"]) del(pinfo["bibref_check_required"]) if "checkout_confirmed" in pinfo: del(pinfo["checkout_confirmed"]) if "checkout_faulty_fields" in pinfo: del(pinfo["checkout_faulty_fields"]) #pinfo['ulevel'] = ulevel # pinfo["claimpaper_admin_last_viewed_pid"] = -1 pinfo["admin_requested_ticket_id"] = -1 session.save() def _generate_search_ticket_box(self, req): ''' Generate the search ticket to remember a pending search for Person entities in an attribution process @param req: Apache request object @type req: Apache request object ''' session = get_session(req) pinfo = session["personinfo"] search_ticket = None if 'ln' in pinfo: ln = pinfo["ln"] else: ln = CFG_SITE_LANG _ = gettext_set_language(ln) if 'search_ticket' in pinfo: search_ticket = pinfo['search_ticket'] if not search_ticket: return '' else: teaser = _('Person search for assignment in progress!') message = _('You are searching for a person to assign the following papers:') return TEMPLATE.tmpl_search_ticket_box(teaser, message, search_ticket) def search(self, req, form, is_fallback=False, fallback_query='', fallback_title='', fallback_message=''): ''' Function used for searching a person based on a name with which the function is queried. @param req: Apache Request Object @type req: Apache Request Object @param form: Parameters sent via GET or POST request @type form: dict @return: a full page formatted in HTML @return: string ''' self._session_bareinit(req) session = get_session(req) no_access = self._page_access_permission_wall(req) new_person_link = False if no_access: return no_access pinfo = session["personinfo"] search_ticket = None if 'search_ticket' in pinfo: search_ticket = pinfo['search_ticket'] if "ulevel" in pinfo: if pinfo["ulevel"] == "admin": new_person_link = True body = '' if search_ticket: body = body + self._generate_search_ticket_box(req) max_num_show_papers = 5 argd = wash_urlargd( form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0), 'q': (str, None)}) ln = wash_language(argd['ln']) query = None recid = None nquery = None search_results = None title = "Person Search" if 'q' in argd: if argd['q']: query = escape(argd['q']) if is_fallback and fallback_query: query = fallback_query if query: authors = [] if query.count(":"): try: left, right = query.split(":") try: recid = int(left) nquery = str(right) except (ValueError, TypeError): try: recid = int(right) nquery = str(left) except (ValueError, TypeError): recid = None nquery = query except ValueError: recid = None nquery = query else: nquery = query sorted_results = webapi.search_person_ids_by_name(nquery) for index, results in enumerate(sorted_results): pid = results[0] # authorpapers = webapi.get_papers_by_person_id(pid, -1) # authorpapers = sorted(authorpapers, key=itemgetter(0), # reverse=True) if index < bconfig.PERSON_SEARCH_RESULTS_SHOW_PAPERS_PERSON_LIMIT: authorpapers = [[paper] for paper in sort_records(None, [i[0] for i in webapi.get_papers_by_person_id(pid, -1)], sort_field="year", sort_order="a")] else: authorpapers = [['Not retrieved to increase performances.']] if (recid and not (str(recid) in [row[0] for row in authorpapers])): continue authors.append([results[0], results[1], authorpapers[0:max_num_show_papers]]) search_results = authors if recid and (len(search_results) == 1) and not is_fallback: return redirect_to_url(req, "/person/%s" % search_results[0][0]) body = body + TEMPLATE.tmpl_author_search(query, search_results, search_ticket, author_pages_mode=True, fallback_mode=is_fallback, fallback_title=fallback_title, fallback_message=fallback_message, new_person_link=new_person_link) if not is_fallback: body = TEMPLATE.tmpl_person_detail_layout(body) return page(title=title, metaheaderadd=self._scripts(kill_browser_cache=True), body=body, req=req, language=ln) def welcome(self, req, form): ''' Generate SSO landing/welcome page @param req: Apache request object @type req: Apache request object @param form: GET/POST request params @type form: dict ''' uid = getUid(req) self._session_bareinit(req) argd = wash_urlargd( form, {'ln': (str, CFG_SITE_LANG)}) ln = wash_language(argd['ln']) _ = gettext_set_language(ln) if uid == 0: return page_not_authorized(req, text=_("This page in not accessible directly.")) title_message = _('Welcome!') # start continuous writing to the browser... req.content_type = "text/html" req.send_http_header() req.write(pageheaderonly(req=req, title=title_message, language=ln)) req.write(TEMPLATE.tmpl_welcome_start()) body = "" if CFG_INSPIRE_SITE: body = TEMPLATE.tmpl_welcome_arxiv() else: body = TEMPLATE.tmpl_welcome() req.write(body) # now do what will take time... pid = webapi.arxiv_login(req) #session must be read after webapi.arxiv_login did it's stuff session = get_session(req) pinfo = session["personinfo"] pinfo["claimpaper_admin_last_viewed_pid"] = pid session.save() link = TEMPLATE.tmpl_welcome_link() req.write(link) req.write(TEMPLATE.tmpl_welcome_end()) req.write(pagefooteronly(req=req)) def tickets_admin(self, req, form): ''' Generate SSO landing/welcome page @param req: Apache request object @type req: Apache request object @param form: GET/POST request params @type form: dict ''' self._session_bareinit(req) no_access = self._page_access_permission_wall(req, req_level='admin') if no_access: return no_access tickets = webapi.get_persons_with_open_tickets_list() tickets = list(tickets) for t in list(tickets): tickets.remove(t) tickets.append([webapi.get_most_frequent_name_from_pid(int(t[0])), webapi.get_person_redirect_link(t[0]), t[0], t[1]]) body = TEMPLATE.tmpl_tickets_admin(tickets) body = TEMPLATE.tmpl_person_detail_layout(body) title = 'Open RT tickets' return page(title=title, metaheaderadd=self._scripts(), body=body, req=req) def export(self, req, form): ''' Generate JSONized export of Person data @param req: Apache request object @type req: Apache request object @param form: GET/POST request params @type form: dict ''' argd = wash_urlargd( form, {'ln': (str, CFG_SITE_LANG), 'request': (str, None), 'userid': (str, None)}) if not JSON_OK: return "500_json_not_found__install_package" # session = get_session(req) # ln = wash_language(argd['ln']) request = None userid = None if "userid" in argd and argd['userid']: userid = argd['userid'] else: return "404_user_not_found" if "request" in argd and argd['request']: request = argd["request"] # find user from ID user_email = get_email_from_username(userid) if user_email == userid: return "404_user_not_found" uid = get_uid_from_email(user_email) uinfo = collect_user_info(uid) # find person by uid pid = webapi.get_pid_from_uid(uid) # find papers py pid that are confirmed through a human. papers = webapi.get_papers_by_person_id(pid, 2) # filter by request param, e.g. arxiv if not request: return "404__no_filter_selected" if not request in bconfig.VALID_EXPORT_FILTERS: return "500_filter_invalid" if request == "arxiv": query = "(recid:" query += " OR recid:".join(papers) query += ") AND 037:arxiv" db_docs = perform_request_search(p=query) nickmail = "" nickname = "" db_arxiv_ids = [] try: nickname = uinfo["nickname"] except KeyError: pass if not nickname: try: nickmail = uinfo["email"] except KeyError: nickmail = user_email nickname = nickmail db_arxiv_ids = get_fieldvalues(db_docs, "037__a") construct = {"nickname": nickname, "claims": ";".join(db_arxiv_ids)} jsondmp = json.dumps(construct) signature = webapi.sign_assertion("arXiv", jsondmp) construct["digest"] = signature return json.dumps(construct) index = __call__ me = welcome you = welcome # pylint: enable=C0301 # pylint: enable=W0613 diff --git a/modules/bibcirculation/lib/bibcirculation_daemon.py b/modules/bibcirculation/lib/bibcirculation_daemon.py index b5dbc3ccf..f065c046a 100644 --- a/modules/bibcirculation/lib/bibcirculation_daemon.py +++ b/modules/bibcirculation/lib/bibcirculation_daemon.py @@ -1,190 +1,190 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibCirculation daemon. """ __revision__ = "$Id$" import sys import datetime import time from invenio.dbquery import run_sql from invenio.bibtask import task_init from invenio.mailutils import send_email import invenio.bibcirculation_dblayer as db from invenio.bibcirculation_config import CFG_BIBCIRCULATION_TEMPLATES, \ CFG_BIBCIRCULATION_LIBRARIAN_EMAIL -from invenio.search_engine import get_fieldvalues +from invenio.search_engine_utils import get_fieldvalues from invenio.bibcirculation_utils import generate_email_body def get_expired_loan(): """ @return all expired loans """ res = run_sql("""select id_crcBORROWER, id, id_bibrec from crcLOAN where status = 'on loan' and due_date < NOW() """) return res def update_expired_loan(loan_id): """ Update status, number of overdue letter and date of overdue letter @param loan_id: identify the loan. Primary key of crcLOAN. @type loan_id: int """ run_sql("""update crcLOAN set overdue_letter_number = overdue_letter_number + 1, status = 'expired', overdue_letter_date = NOW() where id = %s """, (loan_id, )) def get_overdue_letters_info(loan_id): """ Get the number of letters and the date of the last letter sent for a given loan_id. @param loan_id: identify the loan. Primary of crcLOAN. @type loan_id: int @return number_of_letters and date of the last letter """ res = run_sql("""select overdue_letter_number, DATE_FORMAT(overdue_letter_date,'%%Y-%%m-%%d') from crcLOAN where id=%s""", (loan_id, )) return res[0] def send_overdue_letter(borrower_id, subject, content): """ Send an overdue letter @param borrower_id: identify the borrower. Primary key of crcBORROWER. @type borrower_id: int @param subject: subject of the overdue letter @type subject: string """ to_borrower = db.get_borrower_email(borrower_id) send_email(fromaddr=CFG_BIBCIRCULATION_LIBRARIAN_EMAIL, toaddr=to_borrower, subject=subject, content=content, header='', footer='', attempt_times=1, attempt_sleeptime=10 ) return 1 def send_second_recall(date_letters): """ @param date_letters: date of the last letter. @type date_letters: string @return boolean """ today = datetime.date.today() time_tuple = time.strptime(date_letters, "%Y-%m-%d") #datetime.strptime(date_letters, "%Y-%m-%d") doesn't work (only on 2.5). tmp_date = datetime.datetime(*time_tuple[0:3]) + datetime.timedelta(weeks=1) if tmp_date.strftime("%Y-%m-%d") == today.strftime("%Y-%m-%d"): return True else: return False def send_third_recall(date_letters): """ @param date_letters: date of the last letter. @type date_letters: string @return boolean """ today = datetime.date.today() time_tuple = time.strptime(date_letters, "%Y-%m-%d") #datetime.strptime(date_letters, "%Y-%m-%d") doesn't work (only on 2.5). tmp_date = datetime.datetime(*time_tuple[0:3]) + datetime.timedelta(days=3) if tmp_date.strftime("%Y-%m-%d") == today.strftime("%Y-%m-%d"): return True else: return False def task_run_core(): """ run daemon """ #write_message("Getting expired loans ...", verbose=9) expired_loans = get_expired_loan() for (borrower_id, loan_id, recid) in expired_loans: (number_of_letters, date_letters) = get_overdue_letters_info(loan_id) if number_of_letters == 0: content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL1'], loan_id) elif number_of_letters == 1 and send_second_recall(date_letters): content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL2'], loan_id) elif number_of_letters == 2 and send_third_recall(date_letters): content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id) else: content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id) title = ''.join(get_fieldvalues(recid, "245__a")) subject = "LOAN RECALL: " + title update_expired_loan(loan_id) #write_message("Updating information about expired loans") send_overdue_letter(borrower_id, subject, content) #write_message("Sending overdue letter") #write_message("Done!!") return 1 def main(): """ main() """ task_init(authorization_action='runbibcirculation', authorization_msg="BibCirculation Task Submission", description="""Examples: %s -u admin """ % (sys.argv[0],), version=__revision__, task_run_fnc = task_run_core) if __name__ == '__main__': main() diff --git a/modules/bibcirculation/lib/bibcirculation_utils.py b/modules/bibcirculation/lib/bibcirculation_utils.py index fdad98d17..bd8ceb253 100644 --- a/modules/bibcirculation/lib/bibcirculation_utils.py +++ b/modules/bibcirculation/lib/bibcirculation_utils.py @@ -1,691 +1,691 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibCirculation Utils: Auxiliary methods of BibCirculation """ __revision__ = "$Id$" -from invenio.search_engine import get_fieldvalues +from invenio.search_engine_utils import get_fieldvalues from invenio.bibtask import task_low_level_submission import invenio.bibcirculation_dblayer as db from invenio.urlutils import create_html_link from invenio.config import CFG_SITE_URL, CFG_TMPDIR from invenio.bibcirculation_config import CFG_BIBCIRCULATION_AMAZON_ACCESS_KEY, \ CFG_BIBCIRCULATION_WORKING_DAYS, \ CFG_BIBCIRCULATION_HOLIDAYS from invenio.messages import gettext_set_language import datetime, time def hold_request_mail(recid, borrower_id): """ Create the mail who will be sent for each hold requests. @param recid: identify the record. Primary key of bibrec. @type recid: int @param borrower_id: identify the borrower. Primary key of crcBORROWER. @type borrower_id: int @return email(body) """ (book_title, book_year, book_author, book_isbn, book_editor) = book_information_from_MARC(recid) ############## need some code refactoring ############### more_holdings_infos = db.get_holdings_details(recid) borrower_infos = db.get_borrower_details(borrower_id) ######################################################### title_link = create_html_link(CFG_SITE_URL + '/admin/bibcirculation/bibcirculationadmin.py/get_item_details', {'recid': recid}, (book_title)) out = """ This is an automatic email for confirming the hold request for a book on behalf of: %s (email: %s) title: %s author: %s location: %s library: %s publisher: %s year: %s isbn: %s """ % (borrower_infos[1], borrower_infos[2], title_link, book_author, more_holdings_infos[0][1], more_holdings_infos[0][2], book_editor, book_year, book_isbn) return out def get_book_cover(isbn): """ Retrieve book cover using Amazon web services. @param isbn: book's isbn @type isbn: string @return book cover """ from xml.dom import minidom import urllib # connect to AWS cover_xml = urllib.urlopen('http://ecs.amazonaws.com/onca/xml' \ '?Service=AWSECommerceService&AWSAccessKeyId=' \ + CFG_BIBCIRCULATION_AMAZON_ACCESS_KEY + \ '&Operation=ItemSearch&Condition=All&' \ 'ResponseGroup=Images&SearchIndex=Books&' \ 'Keywords=' + isbn) # parse XML try: xml_img = minidom.parse(cover_xml) retrieve_book_cover = xml_img.getElementsByTagName('MediumImage') book_cover = retrieve_book_cover.item(0).firstChild.firstChild.data except AttributeError: book_cover = "%s/img/book_cover_placeholder.gif" % (CFG_SITE_URL) return book_cover def book_information_from_MARC(recid): """ Retrieve book's information from MARC @param recid: identify the record. Primary key of bibrec. @type recid: int @return tuple with title, year, author, isbn and editor. """ book_title = ' '.join(get_fieldvalues(recid, "245__a") + \ get_fieldvalues(recid, "245__b") + \ get_fieldvalues(recid, "245__n") + \ get_fieldvalues(recid, "245__p")) book_year = ' '.join(get_fieldvalues(recid, "260__c")) book_author = ' '.join(get_fieldvalues(recid, "100__a") + \ get_fieldvalues(recid, "100__u")) book_isbn = ' '.join(get_fieldvalues(recid, "020__a")) book_editor = ' , '.join(get_fieldvalues(recid, "260__a") + \ get_fieldvalues(recid, "260__b")) return (book_title, book_year, book_author, book_isbn, book_editor) def book_title_from_MARC(recid): """ Retrieve book's title from MARC @param recid: identify the record. Primary key of bibrec. @type recid: int @return book's title """ book_title = ' '.join(get_fieldvalues(recid, "245__a") + \ get_fieldvalues(recid, "245__b") + \ get_fieldvalues(recid, "245__n") + \ get_fieldvalues(recid, "245__p")) return book_title def update_status_if_expired(loan_id): """ Update the loan's status if status is 'expired'. @param loan_id: identify the loan. Primary key of crcLOAN. @type loan_id: int """ loan_status = db.get_loan_status(loan_id) if loan_status == 'expired': db.update_loan_status('on loan', loan_id) return def get_next_day(date_string): """ Get the next day @param date_string: date @type date_string: string return next day """ # add 1 day more_1_day = datetime.timedelta(days=1) # convert date_string to datetime format tmp_date = time.strptime(date_string, '%Y-%m-%d') # calculate the new date (next day) next_day = datetime.datetime(*tmp_date[:3]) + more_1_day return next_day def generate_new_due_date(days): """ Generate a new due date (today + X days = new due date). @param days: number of days @type days: string @return new due date """ today = datetime.date.today() more_X_days = datetime.timedelta(days=days) tmp_date = today + more_X_days week_day = tmp_date.strftime('%A') due_date = tmp_date.strftime('%Y-%m-%d') due_date_validated = False while not due_date_validated: if week_day in CFG_BIBCIRCULATION_WORKING_DAYS and due_date not in CFG_BIBCIRCULATION_HOLIDAYS: due_date_validated = True else: next_day = get_next_day(due_date) due_date = next_day.strftime('%Y-%m-%d') week_day = next_day.strftime('%A') return due_date def renew_loan_for_X_days(barcode): """ Renew a loan based on its loan period @param barcode: identify the item. Primary key of crcITEM. @type barcode: string @return new due date """ loan_period = db.get_loan_period(barcode) if loan_period == '4 weeks': due_date = generate_new_due_date(30) else: due_date = generate_new_due_date(7) return due_date def make_copy_available(request_id): """ Change the status of a copy for 'available' when an hold request was cancelled. @param request_id: identify the request: Primary key of crcLOANREQUEST @type request_id: int """ barcode_requested = db.get_requested_barcode(request_id) db.update_item_status('available', barcode_requested) return def print_new_loan_information(req, ln): """ Create a printable format with the information of the last loan who has been registered on the table crcLOAN. """ _ = gettext_set_language(ln) # get the last loan from crcLOAN (recid, borrower_id, due_date) = db.get_last_loan() # get book's information (book_title, book_year, book_author, book_isbn, book_editor) = book_information_from_MARC(recid) # get borrower's data/information (name, address, email) (borrower_name, borrower_address, borrower_email) = db.get_borrower_data(borrower_id) # Generate printable format req.content_type = "text/html" req.send_http_header() out = """""" out += """

""" % (CFG_SITE_URL) out += """""" out += """ """ % (_("Loan information")) out += """ """ % (_("This book is sent to you ...")) out += """

%s

%s

""" out += """""" out += """ """ % (_("Title"), book_title, _("Author"), book_author, _("Editor"), book_editor, _("ISBN"), book_isbn, _("Year"), book_year) out += """
%s%s
%s%s
%s%s
%s%s
%s%s

""" out += """""" out += """ """ % (_("Id"), borrower_id, _("Name"), borrower_name, _("Address"), borrower_address, _("Email"), borrower_email) out += """
%s%s
%s%s
%s%s
%s%s

""" out += """""" out += """ """ % (_("Due date"), due_date) out += """

%s: %s

""" out += """
""" req.write("") req.write(out) req.write("") return "\n" def print_pending_hold_requests_information(req, ln): """ Create a printable format with all the information about all pending hold requests. """ _ = gettext_set_language(ln) requests = db.get_pdf_request_data('pending') req.content_type = "text/html" req.send_http_header() out = """""" out += """

""" % (CFG_SITE_URL) out += """""" out += """ """ % (_("List of pending hold requests")) out += """ """ % (time.ctime()) out += """

%s

%s

""" out += """""" out += """ """ % (_("Borrower"), _("Item"), _("Library"), _("Location"), _("From"), _("To"), _("Request date")) for (recid, borrower_name, library_name, location, date_from, date_to, request_date) in requests: out += """ """ % (borrower_name, book_title_from_MARC(recid), library_name, location, date_from, date_to, request_date) out += """
%s %s %s %s %s %s %s
%s %s %s %s %s %s %s


""" req.write("") req.write(out) req.write("") return "\n" def get_item_info_for_search_result(recid): """ Get the item's info from MARC in order to create a search result with more details @param recid: identify the record. Primary key of bibrec. @type recid: int @return book's informations (author, editor and number of copies) """ book_author = ' '.join(get_fieldvalues(recid, "100__a") + \ get_fieldvalues(recid, "100__u")) book_editor = ' , '.join(get_fieldvalues(recid, "260__a") + \ get_fieldvalues(recid, "260__b") + \ get_fieldvalues(recid, "260__c")) book_copies = ' '.join(get_fieldvalues(recid, "964__a")) book_infos = (book_author, book_editor, book_copies) return book_infos def update_request_data(request_id): """ Update the status of a given request. @param request_id: identify the request: Primary key of crcLOANREQUEST @type request_id: int """ barcode = db.get_request_barcode(request_id) nb_requests = db.get_number_requests_per_copy(barcode) is_on_loan = db.is_item_on_loan(barcode) if nb_requests == 0 and is_on_loan is not None: db.update_item_status('on loan', barcode) elif nb_requests == 0 and is_on_loan is None: db.update_item_status('available', barcode) else: db.update_item_status('requested', barcode) return def compare_dates(date): """ Compare given date with today @param date: given date @type date: string @return boolean """ if date < time.strftime("%Y-%m-%d"): return False else: return True def validate_date_format(date): """ Verify the date format @param date: given date @type date: string @return boolean """ try: if time.strptime(date, "%Y-%m-%d"): if compare_dates(date): return True else: return False except ValueError: return False def create_ill_record(book_info): """ Create a new ILL record @param book_info: book's information @type book_info: tuple @return MARC record """ (title, author, place, publisher, year, edition, isbn) = book_info ill_record = """ %(isbn)s %(author)s %(title)s %(edition)s %(place)s %(publisher)s %(year)s ILLBOOK """ % {'isbn': isbn, 'author': author, 'title': title, 'edition': edition, 'place': place, 'publisher': publisher, 'year': year} file_path = '%s/%s_%s.xml' % (CFG_TMPDIR, 'bibcirculation_ill_book', time.strftime("%Y%m%d_%H%M%S")) xml_file = open(file_path, 'w') xml_file.write(ill_record) xml_file.close() # Pass XML file to BibUpload. task_low_level_submission('bibupload', 'bibcirculation', '-P', '5', '-i', file_path) return ill_record def wash_recid_from_ILL_request(ill_request_id): """ Get dictionnary and wash recid values. @param ill_request_id: identify the ILL request. Primray key of crcILLREQUEST @type ill_request_id: int @return recid """ book_info = db.get_ill_book_info(ill_request_id) book_info = eval(book_info) try: recid = int(book_info['recid']) except KeyError: recid = None return recid def get_list_of_ILL_requests(): """ Get list with all recids related with ILL requests """ list_of_recids = [] ill_requests = db.get_ill_ids() for i in range(len(ill_requests)): recid = wash_recid_from_ILL_request(ill_requests[i][0]) if recid: list_of_recids.append(recid) return list_of_recids def all_copies_are_missing(recid): """ Verify if all copies of an item are missing @param recid: identify the record. Primary key of bibrec @type recid: int @return boolean """ copies_status = db.get_copies_status(recid) number_of_missing = 0 for (status) in copies_status: if status == 'missing': number_of_missing += 1 if number_of_missing == len(copies_status): return True else: return False def has_copies(recid): """ Verify if a recid is item (has copies) @param recid: identify the record. Primary key of bibrec @type recid: int @return boolean """ copies_status = db.get_copies_status(recid) if copies_status is None: return False else: if len(copies_status) == 0: return False else: return True def generate_email_body(template, loan_id): """ Generate the body of an email for loan recalls. @param template: email template @type template: string @param loan_id: identify the loan. Primary key of crcLOAN. @type loan_id: int @return email(body) """ recid = db.get_loan_recid(loan_id) (book_title, book_year, book_author, book_isbn, book_editor) = book_information_from_MARC(int(recid)) out = template % (book_title, book_year, book_author, book_isbn, book_editor) return out def create_item_details_url(recid, ln): """ Generate the URL redirecting to the edition of record copies @param recid: The identifier of the record @type recid: int @param ln: The language identifier @type ln: string @return A string being the URL allowing to edit currently viewed record """ url = '/admin/bibcirculation/bibcirculationadmin.py/get_item_details?ln=%s&recid=%s' % (ln, str(recid)) return CFG_SITE_URL + url diff --git a/modules/bibedit/lib/bibedit_utils.py b/modules/bibedit/lib/bibedit_utils.py index f459cc96c..e66c33f8d 100644 --- a/modules/bibedit/lib/bibedit_utils.py +++ b/modules/bibedit/lib/bibedit_utils.py @@ -1,585 +1,585 @@ ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0103 """BibEdit Utilities. This module contains support functions (i.e., those that are not called directly by the web interface), that might be imported by other modules or that is called by both the web and CLI interfaces. """ __revision__ = "$Id$" import cPickle import difflib import fnmatch import marshal import os import re import time import zlib from datetime import datetime from invenio.bibedit_config import CFG_BIBEDIT_FILENAME, \ CFG_BIBEDIT_RECORD_TEMPLATES_PATH, CFG_BIBEDIT_TO_MERGE_SUFFIX, \ CFG_BIBEDIT_FIELD_TEMPLATES_PATH from invenio.bibedit_dblayer import get_record_last_modification_date, \ delete_hp_change from invenio.bibrecord import create_record, create_records, \ record_get_field_value, record_has_field, record_xml_output, \ record_strip_empty_fields, record_strip_empty_volatile_subfields, \ record_order_subfields from invenio.bibtask import task_low_level_submission from invenio.config import CFG_BIBEDIT_LOCKLEVEL, \ CFG_BIBEDIT_TIMEOUT, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG as OAIID_TAG, \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG as SYSNO_TAG, CFG_TMPSHAREDDIR from invenio.dateutils import convert_datetext_to_dategui from invenio.bibedit_dblayer import get_bibupload_task_opts, \ get_marcxml_of_record_revision, get_record_revisions, \ get_info_of_record_revision -from invenio.search_engine import get_fieldvalues, print_record, \ - record_exists, get_colID, guess_primary_collection_of_a_record, \ - get_record +from invenio.search_engine import print_record, record_exists, get_colID, \ + guess_primary_collection_of_a_record, get_record +from invenio.search_engine_utils import get_fieldvalues from invenio.webuser import get_user_info from invenio.dbquery import run_sql from invenio.websearchadminlib import get_detailed_page_tabs # Precompile regexp: re_file_option = re.compile(r'^%s' % CFG_TMPSHAREDDIR) re_xmlfilename_suffix = re.compile('_(\d+)_\d+\.xml$') re_revid_split = re.compile('^(\d+)\.(\d{14})$') re_revdate_split = re.compile('^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)') re_taskid = re.compile('ID="(\d+)"') re_tmpl_name = re.compile('') re_tmpl_description = re.compile('') re_ftmpl_name = re.compile('') re_ftmpl_description = re.compile('') # Helper functions def assert_undo_redo_lists_correctness(undo_list, redo_list): for undoItem in undo_list: assert undoItem != None; for redoItem in redo_list: assert redoItem != None; # Operations on the BibEdit cache file def cache_exists(recid, uid): """Check if the BibEdit cache file exists.""" return os.path.isfile('%s.tmp' % _get_file_path(recid, uid)) def get_cache_mtime(recid, uid): """Get the last modified time of the BibEdit cache file. Check that the cache exists before calling this function. """ try: return int(os.path.getmtime('%s.tmp' % _get_file_path(recid, uid))) except OSError: pass def cache_expired(recid, uid): """Has it been longer than the number of seconds given by CFG_BIBEDIT_TIMEOUT since last cache update? Check that the cache exists before calling this function. """ return get_cache_mtime(recid, uid) < int(time.time()) - CFG_BIBEDIT_TIMEOUT def create_cache_file(recid, uid, record='', cache_dirty=False, pending_changes=[], disabled_hp_changes = {}, undo_list = [], redo_list=[]): """Create a BibEdit cache file, and return revision and record. This will overwrite any existing cache the user has for this record. datetime. """ if not record: record = get_bibrecord(recid) # Order subfields alphabetically after loading the record record_order_subfields(record) if not record: return file_path = '%s.tmp' % _get_file_path(recid, uid) record_revision = get_record_last_modification_date(recid) if record_revision == None: record_revision = datetime.now().timetuple() cache_file = open(file_path, 'w') assert_undo_redo_lists_correctness(undo_list, redo_list); cPickle.dump([cache_dirty, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list], cache_file) cache_file.close() return record_revision, record def touch_cache_file(recid, uid): """Touch a BibEdit cache file. This should be used to indicate that the user has again accessed the record, so that locking will work correctly. """ if cache_exists(recid, uid): os.system('touch %s.tmp' % _get_file_path(recid, uid)) def get_bibrecord(recid): """Return record in BibRecord wrapping.""" if record_exists(recid): return create_record(print_record(recid, 'xm'))[0] def get_cache_file_contents(recid, uid): """Return the contents of a BibEdit cache file.""" cache_file = _get_cache_file(recid, uid, 'r') if cache_file: cache_dirty, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list = cPickle.load(cache_file) cache_file.close() assert_undo_redo_lists_correctness(undo_list, redo_list); return cache_dirty, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list def update_cache_file_contents(recid, uid, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list): """Save updates to the record in BibEdit cache. Return file modificaton time. """ cache_file = _get_cache_file(recid, uid, 'w') if cache_file: assert_undo_redo_lists_correctness(undo_list, redo_list); cPickle.dump([True, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list], cache_file) cache_file.close() return get_cache_mtime(recid, uid) def delete_cache_file(recid, uid): """Delete a BibEdit cache file.""" os.remove('%s.tmp' % _get_file_path(recid, uid)) def delete_disabled_changes(used_changes): for change_id in used_changes: delete_hp_change(change_id) def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False): """Write XML record to file. Default behaviour is to read the record from a BibEdit cache file, filter out the unchanged volatile subfields, write it back to an XML file and then pass this file to BibUpload. @param xml_record: give XML as string in stead of reading cache file @param to_upload: pass the XML file to BibUpload @param to_merge: prepare an XML file for BibMerge to use """ if not xml_record: # Read record from cache file. cache = get_cache_file_contents(recid, uid) if cache: record = cache[2] used_changes = cache[4] # record_strip_empty_fields(record) # now performed for every record after removing unfilled volatile fields xml_record = record_xml_output(record) delete_cache_file(recid, uid) delete_disabled_changes(used_changes) else: record = create_record(xml_record)[0] # clean the record from unfilled volatile fields record_strip_empty_volatile_subfields(record) record_strip_empty_fields(record) # order subfields alphabetically before saving the record record_order_subfields(record) xml_to_write = record_xml_output(record) # Write XML file. if not to_merge: file_path = '%s.xml' % _get_file_path(recid, uid) else: file_path = '%s_%s.xml' % (_get_file_path(recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX) xml_file = open(file_path, 'w') xml_file.write(xml_to_write) xml_file.close() user_name = get_user_info(uid)[1] if to_upload: # Pass XML file to BibUpload. task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r', file_path, '-u', user_name) return True # Security: Locking and integrity def latest_record_revision(recid, revision_time): """Check if timetuple REVISION_TIME matches latest modification date.""" latest = get_record_last_modification_date(recid) # this can be none if the record is new return (latest == None) or (revision_time == latest) def record_locked_by_other_user(recid, uid): """Return true if any other user than UID has active caches for record RECID. """ active_uids = _uids_with_active_caches(recid) try: active_uids.remove(uid) except ValueError: pass return bool(active_uids) def record_locked_by_queue(recid): """Check if record should be locked for editing because of the current state of the BibUpload queue. The level of checking is based on CFG_BIBEDIT_LOCKLEVEL. """ # Check for *any* scheduled bibupload tasks. if CFG_BIBEDIT_LOCKLEVEL == 2: return _get_bibupload_task_ids() filenames = _get_bibupload_filenames() # Check for match between name of XML-files and record. # Assumes that filename ends with _.xml. if CFG_BIBEDIT_LOCKLEVEL == 1: recids = [] for filename in filenames: filename_suffix = re_xmlfilename_suffix.search(filename) if filename_suffix: recids.append(int(filename_suffix.group(1))) return recid in recids # Check for match between content of files and record. if CFG_BIBEDIT_LOCKLEVEL == 3: while True: lock = _record_in_files_p(recid, filenames) # Check if any new files were added while we were searching if not lock: filenames_updated = _get_bibupload_filenames() for filename in filenames_updated: if not filename in filenames: break else: return lock else: return lock # JSON def json_unicode_to_utf8(data): """Change all strings in a JSON structure to UTF-8.""" if type(data) == unicode: return data.encode('utf-8') elif type(data) == dict: newdict = {} for key in data: newdict[json_unicode_to_utf8(key)] = json_unicode_to_utf8(data[key]) return newdict elif type(data) == list: return [json_unicode_to_utf8(elem) for elem in data] else: return data # History/revisions def revision_to_timestamp(td): """ Converts the revision date to the timestamp """ return "%04i%02i%02i%02i%02i%02i" % (td.tm_year, td.tm_mon, td.tm_mday, \ td.tm_hour, td.tm_min, td.tm_sec) def timestamp_to_revision(timestamp): """ Converts the timestamp to a correct revision date """ year = int(timestamp[0:4]) month = int(timestamp[4:6]) day = int(timestamp[6:8]) hour = int(timestamp[8:10]) minute = int(timestamp[10:12]) second = int(timestamp[12:14]) return datetime(year, month, day, hour, minute, second).timetuple() def get_record_revision_timestamps(recid): """return list of timestamps describing teh revisions of a given record""" rev_ids = get_record_revision_ids(recid) result = [] for rev_id in rev_ids: result.append(rev_id.split(".")[1]) return result def get_record_revision_ids(recid): """Return list of all record revision IDs. Return revision IDs in chronologically decreasing order (latest first). """ res = [] tmp_res = get_record_revisions(recid) for row in tmp_res: res.append('%s.%s' % (row[0], row[1])) return res def get_marcxml_of_revision(recid, revid): """Return MARCXML string of revision. Return empty string if revision does not exist. REVID should be a string. """ res = '' tmp_res = get_marcxml_of_record_revision(recid, revid) if tmp_res: for row in tmp_res: res += zlib.decompress(row[0]) + '\n' return res; def get_marcxml_of_revision_id(revid): """Return MARCXML string of revision. Return empty string if revision does not exist. REVID should be a string. """ recid, job_date = split_revid(revid, 'datetext') return get_marcxml_of_revision(recid, job_date); def get_info_of_revision_id(revid): """Return info string regarding revision. Return empty string if revision does not exist. REVID should be a string. """ recid, job_date = split_revid(revid, 'datetext') res = '' tmp_res = get_info_of_record_revision(recid, job_date) if tmp_res: task_id = str(tmp_res[0][0]) author = tmp_res[0][1] if not author: author = 'N/A' res += '%s%s%s' % (revid.ljust(22), task_id.ljust(15), author.ljust(15)) job_details = tmp_res[0][2].split() upload_mode = job_details[0] + job_details[1][:-1] upload_file = job_details[2] + job_details[3][:-1] res += '%s %s' % (upload_mode, upload_file) return res def revision_format_valid_p(revid): """Test validity of revision ID format (=RECID.REVDATE).""" if re_revid_split.match(revid): return True return False def record_revision_exists(recid, revid): results = get_record_revisions(recid) for res in results: if res[1] == revid: return True return False def split_revid(revid, dateformat=''): """Split revid and return tuple (recid, revdate). Optional dateformat can be datetext or dategui. """ recid, revdate = re_revid_split.search(revid).groups() if dateformat: datetext = '%s-%s-%s %s:%s:%s' % re_revdate_split.search( revdate).groups() if dateformat == 'datetext': revdate = datetext elif dateformat == 'dategui': revdate = convert_datetext_to_dategui(datetext, secs=True) return recid, revdate def get_xml_comparison(header1, header2, xml1, xml2): """Return diff of two MARCXML records.""" return ''.join(difflib.unified_diff(xml1.splitlines(1), xml2.splitlines(1), header1, header2)) #Templates def get_templates(templatesDir, tmpl_name, tmpl_description, extractContent = False): """Return list of templates [filename, name, description, content*] the extractContent variable indicated if the parsed content should be included""" template_fnames = fnmatch.filter(os.listdir( templatesDir), '*.xml') templates = [] for fname in template_fnames: template_file = open('%s%s%s' % ( templatesDir, os.sep, fname),'r') template = template_file.read() template_file.close() fname_stripped = os.path.splitext(fname)[0] mo_name = tmpl_name.search(template) mo_description = tmpl_description.search(template) if mo_name: name = mo_name.group(1) else: name = fname_stripped if mo_description: description = mo_description.group(1) else: description = '' if (extractContent): parsedTemplate = create_record(template)[0] if parsedTemplate != None: # If the template was correct templates.append([fname_stripped, name, description, parsedTemplate]) else: raise "Problem when parsing the template %s" % (fname, ) else: templates.append([fname_stripped, name, description]) return templates # Field templates def get_field_templates(): """Returns list of field templates [filename, name, description, content]""" return get_templates(CFG_BIBEDIT_FIELD_TEMPLATES_PATH, re_ftmpl_name, re_ftmpl_description, True) # Record templates def get_record_templates(): """Return list of record template [filename, name, description] .""" return get_templates(CFG_BIBEDIT_RECORD_TEMPLATES_PATH, re_tmpl_name, re_tmpl_description, False) def get_record_template(name): """Return an XML record template.""" filepath = '%s%s%s.xml' % (CFG_BIBEDIT_RECORD_TEMPLATES_PATH, os.sep, name) if os.path.isfile(filepath): template_file = open(filepath, 'r') template = template_file.read() template_file.close() return template # Private functions def _get_cache_file(recid, uid, mode): """Return a BibEdit cache file object.""" if cache_exists(recid, uid): return open('%s.tmp' % _get_file_path(recid, uid), mode) def _get_file_path(recid, uid, filename=''): """Return the file path to a BibEdit file (excluding suffix). If filename is specified this replaces the config default. """ if not filename: return '%s%s%s_%s_%s' % (CFG_TMPSHAREDDIR, os.sep, CFG_BIBEDIT_FILENAME, recid, uid) else: return '%s%s%s_%s_%s' % (CFG_TMPSHAREDDIR, os.sep, filename, recid, uid) def _uids_with_active_caches(recid): """Return list of uids with active caches for record RECID. Active caches are caches that have been modified a number of seconds ago that is less than the one given by CFG_BIBEDIT_TIMEOUT. """ re_tmpfilename = re.compile('%s_%s_(\d+)\.tmp' % (CFG_BIBEDIT_FILENAME, recid)) tmpfiles = fnmatch.filter(os.listdir(CFG_TMPSHAREDDIR), '%s*.tmp' % CFG_BIBEDIT_FILENAME) expire_time = int(time.time()) - CFG_BIBEDIT_TIMEOUT active_uids = [] for tmpfile in tmpfiles: mo = re_tmpfilename.match(tmpfile) if mo and int(os.path.getmtime('%s%s%s' % ( CFG_TMPSHAREDDIR, os.sep, tmpfile))) > expire_time: active_uids.append(int(mo.group(1))) return active_uids def _get_bibupload_task_ids(): """Return list of all BibUpload task IDs. Ignore tasks submitted by user bibreformat. """ res = run_sql('''SELECT id FROM schTASK WHERE proc LIKE "bibupload%" AND user <> "bibreformat" AND status IN ("WAITING", "SCHEDULED", "RUNNING", "CONTINUING", "ABOUT TO STOP", "ABOUT TO SLEEP", "SLEEPING")''') return [row[0] for row in res] def _get_bibupload_filenames(): """Return paths to all files scheduled for upload.""" task_ids = _get_bibupload_task_ids() filenames = [] tasks_opts = get_bibupload_task_opts(task_ids) for task_opts in tasks_opts: if task_opts: record_options = marshal.loads(task_opts[0][0]) for option in record_options[1:]: if re_file_option.search(option): filenames.append(option) return filenames def _record_in_files_p(recid, filenames): """Search XML files for given record.""" # Get id tags of record in question rec_oaiid = rec_sysno = -1 rec_oaiid_tag = get_fieldvalues(recid, OAIID_TAG) if rec_oaiid_tag: rec_oaiid = rec_oaiid_tag[0] rec_sysno_tag = get_fieldvalues(recid, SYSNO_TAG) if rec_sysno_tag: rec_sysno = rec_sysno_tag[0] # For each record in each file, compare ids and abort if match is found for filename in filenames: try: file_ = open(filename) records = create_records(file_.read(), 0, 0) for i in range(0, len(records)): record, all_good = records[i][:2] if record and all_good: if _record_has_id_p(record, recid, rec_oaiid, rec_sysno): return True file_.close() except IOError: continue return False def _record_has_id_p(record, recid, rec_oaiid, rec_sysno): """Check if record matches any of the given IDs.""" if record_has_field(record, '001'): if (record_get_field_value(record, '001', '%', '%') == str(recid)): return True if record_has_field(record, OAIID_TAG[0:3]): if (record_get_field_value( record, OAIID_TAG[0:3], OAIID_TAG[3], OAIID_TAG[4], OAIID_TAG[5]) == rec_oaiid): return True if record_has_field(record, SYSNO_TAG[0:3]): if (record_get_field_value( record, SYSNO_TAG[0:3], SYSNO_TAG[3], SYSNO_TAG[4], SYSNO_TAG[5]) == rec_sysno): return True return False def can_record_have_physical_copies(recid): """Determine if the record can have physical copies (addable through the bibCirculation module). The information is derieved using the tabs displayed for a given record. Only records already saved within the collection may have the physical copies @return: True or False """ if get_record(recid) == None: return False col_id = get_colID(guess_primary_collection_of_a_record(recid)) collections = get_detailed_page_tabs(col_id, recid) if (not collections.has_key("holdings")) or \ (not collections["holdings"].has_key("visible")): return False return collections["holdings"]["visible"] == True diff --git a/modules/bibformat/lib/bibformat_dblayer.py b/modules/bibformat/lib/bibformat_dblayer.py index c72bf1798..d7aaa2dae 100644 --- a/modules/bibformat/lib/bibformat_dblayer.py +++ b/modules/bibformat/lib/bibformat_dblayer.py @@ -1,538 +1,519 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Database access related functions for BibFormat engine and administration pages. """ __revision__ = "$Id$" import zlib import time from invenio.dbquery import run_sql - -## MARC-21 tag/field access functions -def get_fieldvalues(recID, tag): - """ - Returns list of values of the MARC-21 'tag' fields for the record - 'recID'. - - @param recID: record ID to retrieve value from - @param tag: tag to consider - @return: a list of values matching X{tag} in record X{recID} - """ - out = [] - bibXXx = "bib" + tag[0] + tag[1] + "x" - bibrec_bibXXx = "bibrec_" + bibXXx - query = "SELECT value FROM %s AS b, %s AS bb WHERE bb.id_bibrec=%s AND bb.id_bibxxx=b.id AND tag LIKE '%s'" \ - % (bibXXx, bibrec_bibXXx, recID, tag) - res = run_sql(query) - for row in res: - out.append(row[0]) - return out +from invenio.search_engine_utils import get_fieldvalues def localtime_to_utc(date, fmt="%Y-%m-%dT%H:%M:%SZ"): """ Convert localtime to UTC @param date: the date to convert to UTC @type date: string @param fmt: the output format for the returned date @return: a UTC version of input X{date} @rtype: string """ ldate = date.split(" ")[0] ltime = date.split(" ")[1] lhour = ltime.split(":")[0] lminute = ltime.split(":")[1] lsec = ltime.split(":")[2] lyear = ldate.split("-")[0] lmonth = ldate.split("-")[1] lday = ldate.split("-")[2] timetoconvert = time.strftime(fmt, time.gmtime(time.mktime((int(lyear), int(lmonth), int(lday), int(lhour), int(lminute), int(lsec), 0, 0, -1)))) return timetoconvert def get_creation_date(sysno, fmt="%Y-%m-%dT%H:%M:%SZ"): """ Returns the creation date of the record 'sysno'. @param sysno: the record ID for which we want to retrieve creation date @param fmt: output format for the returned date @return: creation date of the record @rtype: string """ out = "" res = run_sql("SELECT DATE_FORMAT(creation_date, '%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id=%s", (sysno,), 1) if res[0][0]: out = localtime_to_utc(res[0][0], fmt) return out def get_modification_date(sysno, fmt="%Y-%m-%dT%H:%M:%SZ"): """ Returns the date of last modification for the record 'sysno'. @param sysno: the record ID for which we want to retrieve modification date @param fmt: output format for the returned date @return: modification date of the record @rtype: string """ out = "" res = run_sql("SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id=%s", (sysno,), 1) if res and res[0][0]: out = localtime_to_utc(res[0][0], fmt) return out ## XML Marc related functions def get_tag_from_name(name): """ Returns the marc code corresponding the given name @param name: name for which we want to retrieve the tag @return: a tag corresponding to X{name} or None if not found """ res = run_sql("SELECT value FROM tag WHERE name LIKE %s", (name,)) if len(res)>0: return res[0][0] else: return None def get_tags_from_name(name): """ Returns the marc codes corresponding the given name, ordered by value @param name: name for which we want to retrieve the tags @return: list of tags corresponding to X{name} or None if not found """ res = run_sql("SELECT value FROM tag WHERE name LIKE %s ORDER BY value", (name,)) if len(res)>0: return list(res[0]) else: return None def tag_exists_for_name(name): """ Returns True if a tag exists for name in 'tag' table. @param name: name for which we want to check if a tag exist @return: True if a tag exist for X{name} or False """ rows = run_sql("SELECT value FROM tag WHERE name LIKE %s", (name,)) if len(rows) > 0: return True return False def get_name_from_tag(tag): """ Returns the name corresponding to a marc code @param tag: tag to consider @return: a name corresponding to X{tag} """ res = run_sql("SELECT name FROM tag WHERE value LIKE %s", (tag,)) if len(res)>0: return res[0][0] else: return None def name_exists_for_tag(tag): """ Returns True if a name exists for tag in 'tag' table. @param tag: tag for which we want to check if a name exist @return: True if a name exist for X{tag} or False """ rows = run_sql("SELECT name FROM tag WHERE value LIKE %s", (tag,)) if len(rows) > 0: return True return False def get_all_name_tag_mappings(): """ Return the list of mappings name<->tag from 'tag' table. The returned object is a dict with name as key (if 2 names are the same we will take the value of one of them, as we cannot make the difference in format templates) @return: a dict containing list of mapping in 'tag' table """ out = {} query = "SELECT value, name FROM tag" res = run_sql(query) for row in res: out[row[1]] = row[0] return out ## Output formats related functions def get_output_format_id(code): """ Returns the id of output format given by code in the database. Output formats are located inside 'format' table @param code: the code of an output format @return: the id in the database of the output format. None if not found """ f_code = code if len(code)>6: f_code = code[:6] res = run_sql("SELECT id FROM format WHERE code=%s", (f_code.lower(),)) if len(res)>0: return res[0][0] else: return None def add_output_format(code, name="", description="", content_type="text/html", visibility=1): """ Add output format into format table. If format with given code already exists, do nothing @param code: the code of the new format @param name: a new for the new format @param description: a description for the new format @param content_type: the content_type (if applicable) of the new output format @param visibility: if the output format is shown to users (1) or not (0) @return: None """ output_format_id = get_output_format_id(code); if output_format_id is None: query = "INSERT INTO format SET code=%s, description=%s, content_type=%s, visibility=%s" params = (code.lower(), description, content_type, visibility) run_sql(query, params) set_output_format_name(code, name) def remove_output_format(code): """ Removes the output format with 'code' If code does not exist in database, do nothing The function also removes all localized names in formatname table @param code: the code of the output format to remove @return: None """ output_format_id = get_output_format_id(code); if output_format_id is None: return query = "DELETE FROM formatname WHERE id_format='%s'" % output_format_id run_sql(query) query = "DELETE FROM format WHERE id='%s'" % output_format_id run_sql(query) def get_output_format_description(code): """ Returns the description of the output format given by code If code or description does not exist, return empty string @param code: the code of the output format to get the description from @return: output format description """ res = run_sql("SELECT description FROM format WHERE code=%s", (code,)) if len(res) > 0: res = res[0][0] if res is not None: return res return "" def set_output_format_description(code, description): """ Sets the description of an output format, given by its code If 'code' does not exist, create format @param code: the code of the output format to update @param description: the new description @return: None """ output_format_id = get_output_format_id(code) if output_format_id is None: add_output_format(code, "", description) query = "UPDATE format SET description=%s WHERE code=%s" params = (description, code.lower()) run_sql(query, params) def get_output_format_visibility(code): """ Returns the visibility of the output format, given by its code If code does not exist, return 0 @param code: the code of an output format @return: output format visibility (0 if not visible, 1 if visible """ res = run_sql("SELECT visibility FROM format WHERE code=%s", (code,)) if len(res) > 0: res = res[0][0] if res is not None and int(res) in range(0, 2): return int(res) return 0 def set_output_format_visibility(code, visibility): """ Sets the visibility of an output format, given by its code If 'code' does not exist, create format @param code: the code of the output format to update @param visibility: the new visibility (0: not visible, 1:visible) @return: None """ output_format_id = get_output_format_id(code) if output_format_id is None: add_output_format(code, "", "", "", visibility) query = "UPDATE format SET visibility=%s WHERE code=%s" params = (visibility, code.lower()) run_sql(query, params) def get_existing_content_types(): """ Returns the list of all MIME content-types used in existing output formats. Always returns at least a list with 'text/html' @return: a list of content-type strings """ query = "SELECT DISTINCT content_type FROM format GROUP BY content_type" res = run_sql(query) if res is not None: res = [val[0] for val in res if len(val) > 0] if not 'text/html' in res: res.append('text/html') return res else: return ['text/html'] def get_output_format_content_type(code): """ Returns the content_type of the output format given by code If code or content_type does not exist, return empty string @param code: the code of the output format to get the description from @return: output format content_type """ res = run_sql("SELECT content_type FROM format WHERE code=%s", (code,)) if len(res) > 0: res = res[0][0] if res is not None: return res return "" def set_output_format_content_type(code, content_type): """ Sets the content_type of an output format, given by its code If 'code' does not exist, create format @param code: the code of the output format to update @param content_type: the content type for the format @return: None """ output_format_id = get_output_format_id(code) if output_format_id is None: # add one if not exist (should not happen) add_output_format(code, "", "", content_type) query = "UPDATE format SET content_type=%s WHERE code=%s" params = (content_type, code.lower()) run_sql(query, params) def get_output_format_names(code): """ Returns the localized names of the output format designated by 'code' The returned object is a dict with keys 'ln' (for long name) and 'sn' (for short name), containing each a dictionary with languages as keys. The key 'generic' contains the generic name of the output format (for use in admin interface) For eg:: {'ln':{'en': "a long name", 'fr': "un long nom", 'de': "ein lange Name"}, 'sn':{'en': "a name", 'fr': "un nom", 'de': "ein Name"} 'generic': "a name"} The returned dictionary is never None. The keys 'ln' and 'sn' are always present. However only languages present in the database are in dicts 'sn' and 'ln'. language "CFG_SITE_LANG" is always in dict. The localized names of output formats are located in formatname table. @param code: the code of the output format to get the names from @return: a dict containing output format names """ out = {'sn':{}, 'ln':{}, 'generic':''} output_format_id = get_output_format_id(code); if output_format_id is None: return out res = run_sql("SELECT name FROM format WHERE code=%s", (code,)) if len(res) > 0: out['generic'] = res[0][0] query = "SELECT type, ln, value FROM formatname WHERE id_format='%s'" % output_format_id res = run_sql(query) for row in res: if row[0] == 'sn' or row[0] == 'ln': out[row[0]][row[1]] = row[2] return out def set_output_format_name(code, name, lang="generic", type='ln'): """ Sets the name of an output format given by code. if 'type' different from 'ln' or 'sn', do nothing if 'name' exceeds 256 chars, 'name' is truncated to first 256 chars. if 'code' does not correspond to exisiting output format, create format if "generic" is given as lang The localized names of output formats are located in formatname table. @param code: the code of an ouput format @param type: either 'ln' (for long name) and 'sn' (for short name) @param lang: the language in which the name is given @param name: the name to give to the output format @return: None """ if len(name) > 256: name = name[:256] if type.lower() != "sn" and type.lower() != "ln": return output_format_id = get_output_format_id(code); if output_format_id is None and lang == "generic" and type.lower() == "ln": # Create output format inside table if it did not exist # Happens when the output format was added not through web interface add_output_format(code, name) output_format_id = get_output_format_id(code) # Reload id, because it was not found previously if lang =="generic" and type.lower()=="ln": # Save inside format table for main name query = "UPDATE format SET name=%s WHERE code=%s" params = (name, code.lower()) run_sql(query, params) else: # Save inside formatname table for name variations run_sql("REPLACE INTO formatname SET id_format=%s, ln=%s, type=%s, value=%s", (output_format_id, lang, type.lower(), name)) def change_output_format_code(old_code, new_code): """ Change the code of an output format @param old_code: the code of the output format to change @param new_code: the new code @return: None """ output_format_id = get_output_format_id(old_code); if output_format_id is None: return query = "UPDATE format SET code=%s WHERE id=%s" params = (new_code.lower(), output_format_id) res = run_sql(query, params) def get_preformatted_record(recID, of, decompress=zlib.decompress): """ Returns the preformatted record with id 'recID' and format 'of' If corresponding record does not exist for given output format, returns None @param recID: the id of the record to fetch @param of: the output format code @param decompress: the method used to decompress the preformatted record in database @return: formatted record as String, or None if not exist """ # Try to fetch preformatted record query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s" params = (recID, of) res = run_sql(query, params) if res: # record 'recID' is formatted in 'of', so return it return "%s" % decompress(res[0][0]) else: return None def get_preformatted_record_date(recID, of): """ Returns the date of the last update of the cache for the considered preformatted record in bibfmt If corresponding record does not exist for given output format, returns None @param recID: the id of the record to fetch @param of: the output format code @return: the date of the last update of the cache, or None if not exist """ # Try to fetch preformatted record query = "SELECT last_updated FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, of) res = run_sql(query) if res: # record 'recID' is formatted in 'of', so return it return "%s" % res[0][0] else: return None ## def keep_formats_in_db(output_formats): ## """ ## Remove from db formats that are not in the list ## TOBE USED ONLY ONCE OLD BIBFORMAT IS REMOVED (or old behaviours will be erased...) ## """ ## query = "SELECT code FROM format" ## res = run_sql(query) ## for row in res: ## if not row[0] in output_formats: ## query = "DELETE FROM format WHERE code='%s'"%row[0] ## def add_formats_in_db(output_formats): ## """ ## Add given formats in db (if not already there) ## """ ## for output_format in output_format: ## if get_format_from_db(output_format) is None: ## #Add new ## query = "UPDATE TABLE format " ## else: ## #Update ## query = "UPDATE TABLE format " ## query = "UPDATE TABLE format " ## res = run_sql(query) ## for row in res: ## if not row[0] in output_formats: ## query = "DELETE FROM format WHERE code='%s'"%row[0] diff --git a/modules/bibformat/lib/bibformat_utils.py b/modules/bibformat/lib/bibformat_utils.py index 874d69916..c236afdd3 100644 --- a/modules/bibformat/lib/bibformat_utils.py +++ b/modules/bibformat/lib/bibformat_utils.py @@ -1,838 +1,821 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Utilities for special formatting of records. API functions: highlight, get_contextual_content, encode_for_xml Used mainly by BibFormat elements. """ __revision__ = "$Id$" import re import zlib import shlex from subprocess import * from invenio.config import \ CFG_OAI_ID_FIELD, \ CFG_WEBSEARCH_FULLTEXT_SNIPPETS, \ CFG_WEBSEARCH_FULLTEXT_SNIPPETS_WORDS, \ CFG_INSPIRE_SITE from invenio.dbquery import run_sql from invenio.urlutils import string_to_numeric_char_reference from invenio.textutils import encode_for_xml from invenio.shellutils import run_shell_command +from invenio.search_engine_utils import get_fieldvalues def highlight_matches(text, compiled_pattern, \ prefix_tag='', suffix_tag=""): """ Highlight words in 'text' matching the 'compiled_pattern' @param text: the text in which we want to "highlight" parts @param compiled_pattern: the parts to highlight @type compiled_pattern: a compiled regular expression @param prefix_tag: prefix to use before each matching parts @param suffix_tag: suffix to use after each matching parts @return: a version of input X{text} with words matching X{compiled_pattern} surrounded by X{prefix_tag} and X{suffix_tag} """ #Add 'prefix_tag' and 'suffix_tag' before and after 'match' #FIXME decide if non english accentuated char should be desaccentuaded def replace_highlight(match): """ replace match.group() by prefix_tag + match.group() + suffix_tag""" return prefix_tag + match.group() + suffix_tag #Replace and return keywords with prefix+keyword+suffix return compiled_pattern.sub(replace_highlight, text) def highlight(text, keywords=None, \ prefix_tag='', suffix_tag=""): """ Returns text with all words highlighted with given tags (this function places 'prefix_tag' and 'suffix_tag' before and after words from 'keywords' in 'text'). for example set prefix_tag='' and suffix_tag="" @param text: the text to modify @param keywords: a list of string @param prefix_tag: prefix to use before each matching parts @param suffix_tag: suffix to use after each matching parts @return: highlighted text """ if not keywords: return text escaped_keywords = [] for k in keywords: escaped_keywords.append(re.escape(k)) #Build a pattern of the kind keyword1 | keyword2 | keyword3 pattern = '|'.join(escaped_keywords) compiled_pattern = re.compile(pattern, re.IGNORECASE) #Replace and return keywords with prefix+keyword+suffix return highlight_matches(text, compiled_pattern, \ prefix_tag, suffix_tag) def get_contextual_content(text, keywords, max_lines=2): """ Returns some lines from a text contextually to the keywords in 'keywords_string' @param text: the text from which we want to get contextual content @param keywords: a list of keyword strings ("the context") @param max_lines: the maximum number of line to return from the record @return: a string """ def grade_line(text_line, keywords): """ Grades a line according to keywords. grade = number of keywords in the line """ grade = 0 for keyword in keywords: grade += text_line.upper().count(keyword.upper()) return grade #Grade each line according to the keywords lines = text.split('.') #print 'lines: ',lines weights = [grade_line(line, keywords) for line in lines] #print 'line weights: ', weights def grade_region(lines_weight): """ Grades a region. A region is a set of consecutive lines. grade = sum of weights of the line composing the region """ grade = 0 for weight in lines_weight: grade += weight return grade if max_lines > 1: region_weights = [] for index_weight in range(len(weights)- max_lines + 1): region_weights.append(grade_region(weights[index_weight:(index_weight+max_lines)])) weights = region_weights #print 'region weights: ',weights #Returns line with maximal weight, and (max_lines - 1) following lines. index_with_highest_weight = 0 highest_weight = 0 i = 0 for weight in weights: if weight > highest_weight: index_with_highest_weight = i highest_weight = weight i += 1 #print 'highest weight', highest_weight if index_with_highest_weight+max_lines > len(lines): return lines[index_with_highest_weight:] else: return lines[index_with_highest_weight:index_with_highest_weight+max_lines] def record_get_xml(recID, format='xm', decompress=zlib.decompress, on_the_fly=False): """ Returns an XML string of the record given by recID. The function builds the XML directly from the database, without using the standard formatting process. 'format' allows to define the flavour of XML: - 'xm' for standard XML - 'marcxml' for MARC XML - 'oai_dc' for OAI Dublin Core - 'xd' for XML Dublin Core If record does not exist, returns empty string. If the record is deleted, returns an empty MARCXML (with recid controlfield, OAI ID fields and 980__c=DELETED) @param recID: the id of the record to retrieve @param format: the format to use @param on_the_fly: if False, try to fetch precreated one in database @param decompress: the library to use to decompress cache from DB @return: the xml string of the record """ from invenio.search_engine import record_exists - def get_fieldvalues(recID, tag): - """Return list of field values for field TAG inside record RECID.""" - out = [] - if tag == "001___": - # we have asked for recID that is not stored in bibXXx tables - out.append(str(recID)) - else: - # we are going to look inside bibXXx tables - digit = tag[0:2] - bx = "bib%sx" % digit - bibx = "bibrec_bib%sx" % digit - query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \ - "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag) - res = run_sql(query) - for row in res: - out.append(row[0]) - return out - def get_creation_date(recID, fmt="%Y-%m-%d"): "Returns the creation date of the record 'recID'." out = "" res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1) if res: out = res[0][0] return out def get_modification_date(recID, fmt="%Y-%m-%d"): "Returns the date of last modification for the record 'recID'." out = "" res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1) if res: out = res[0][0] return out #_ = gettext_set_language(ln) out = "" # sanity check: record_exist_p = record_exists(recID) if record_exist_p == 0: # doesn't exist return out # print record opening tags, if needed: if format == "marcxml" or format == "oai_dc": out += " \n" out += "
\n" for identifier in get_fieldvalues(recID, CFG_OAI_ID_FIELD): out += " %s\n" % identifier out += " %s\n" % get_modification_date(recID) out += "
\n" out += " \n" if format.startswith("xm") or format == "marcxml": res = None if on_the_fly == False: # look for cached format existence: query = """SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'""" % (recID, format) res = run_sql(query, None, 1) if res and record_exist_p == 1: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format' -- they are # not in "bibfmt" table; so fetch all the data from # "bibXXx" tables: if format == "marcxml": out += """ \n""" out += " %d\n" % int(recID) elif format.startswith("xm"): out += """ \n""" out += " %d\n" % int(recID) if record_exist_p == -1: # deleted record, so display only OAI ID and 980: oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD) if oai_ids: out += "%s\n" % \ (CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4], CFG_OAI_ID_FIELD[4:5], CFG_OAI_ID_FIELD[5:6], oai_ids[0]) out += "DELETED\n" else: # controlfields query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\ "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\ "ORDER BY bb.field_number, b.tag ASC" % recID res = run_sql(query) for row in res: field, value = row[0], row[1] value = encode_for_xml(value) out += """ %s\n""" % \ (encode_for_xml(field[0:3]), value) # datafields i = 1 # Do not process bib00x and bibrec_bib00x, as # they are controlfields. So start at bib01x and # bibrec_bib00x (and set i = 0 at the end of # first loop) for digit1 in range(0, 10): for digit2 in range(i, 10): bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx, recID, str(digit1)+str(digit2)) res = run_sql(query) field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] if ind1 == "_" or ind1 == "": ind1 = " " if ind2 == "_" or ind2 == "": ind2 = " " # print field tag if field_number != field_number_old or \ field[:-1] != field_old[:-1]: if field_number_old != -999: out += """ \n""" out += """ \n""" % \ (encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2)) field_number_old = field_number field_old = field # print subfield value value = encode_for_xml(value) out += """ %s\n""" % \ (encode_for_xml(field[-1:]), value) # all fields/subfields printed in this run, so close the tag: if field_number_old != -999: out += """ \n""" i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x # we are at the end of printing the record: out += " \n" elif format == "xd" or format == "oai_dc": # XML Dublin Core format, possibly OAI -- select only some bibXXx fields: out += """ \n""" if record_exist_p == -1: out += "" else: for f in get_fieldvalues(recID, "041__a"): out += " %s\n" % f for f in get_fieldvalues(recID, "100__a"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "700__a"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "245__a"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "65017a"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "8564_u"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "520__a"): out += " %s\n" % encode_for_xml(f) out += " %s\n" % get_creation_date(recID) out += " \n" # print record closing tags, if needed: if format == "marcxml" or format == "oai_dc": out += " \n" out += "
\n" return out def parse_tag(tag): """ Parse a marc code and decompose it in a table with: 0-tag 1-indicator1 2-indicator2 3-subfield The first 3 chars always correspond to tag. The indicators are optional. However they must both be indicated, or both ommitted. If indicators are ommitted or indicated with underscore '_', they mean "No indicator". "No indicator" is also equivalent indicator marked as whitespace. The subfield is optional. It can optionally be preceded by a dot '.' or '$$' or '$' Any of the chars can be replaced by wildcard % THE FUNCTION DOES NOT CHECK WELLFORMNESS OF 'tag' Any empty chars is not considered For example: >> parse_tag('245COc') = ['245', 'C', 'O', 'c'] >> parse_tag('245C_c') = ['245', 'C', '', 'c'] >> parse_tag('245__c') = ['245', '', '', 'c'] >> parse_tag('245__$$c') = ['245', '', '', 'c'] >> parse_tag('245__$c') = ['245', '', '', 'c'] >> parse_tag('245 $c') = ['245', '', '', 'c'] >> parse_tag('245 $$c') = ['245', '', '', 'c'] >> parse_tag('245__.c') = ['245', '', '', 'c'] >> parse_tag('245 .c') = ['245', '', '', 'c'] >> parse_tag('245C_$c') = ['245', 'C', '', 'c'] >> parse_tag('245CO$$c') = ['245', 'C', 'O', 'c'] >> parse_tag('245C_.c') = ['245', 'C', '', 'c'] >> parse_tag('245$c') = ['245', '', '', 'c'] >> parse_tag('245.c') = ['245', '', '', 'c'] >> parse_tag('245$$c') = ['245', '', '', 'c'] >> parse_tag('245__%') = ['245', '', '', ''] >> parse_tag('245__$$%') = ['245', '', '', ''] >> parse_tag('245__$%') = ['245', '', '', ''] >> parse_tag('245 $%') = ['245', '', '', ''] >> parse_tag('245 $$%') = ['245', '', '', ''] >> parse_tag('245$%') = ['245', '', '', ''] >> parse_tag('245.%') = ['245', '', '', ''] >> parse_tag('245$$%') = ['245', '', '', ''] >> parse_tag('2%5$$a') = ['2%5', '', '', 'a'] @param tag: tag to parse @return: a canonical form of the input X{tag} """ p_tag = ['', '', '', ''] # tag, ind1, ind2, code tag = tag.replace(" ", "") # Remove empty characters tag = tag.replace("$", "") # Remove $ characters tag = tag.replace(".", "") # Remove . characters #tag = tag.replace("_", "") # Remove _ characters p_tag[0] = tag[0:3] # tag if len(tag) == 4: p_tag[3] = tag[3] # subfield elif len(tag) == 5: ind1 = tag[3] # indicator 1 if ind1 != "_": p_tag[1] = ind1 ind2 = tag[4] # indicator 2 if ind2 != "_": p_tag[2] = ind2 elif len(tag) == 6: p_tag[3] = tag[5] # subfield ind1 = tag[3] # indicator 1 if ind1 != "_": p_tag[1] = ind1 ind2 = tag[4] # indicator 2 if ind2 != "_": p_tag[2] = ind2 return p_tag def get_all_fieldvalues(recID, tags_in): """ Returns list of values that belong to fields in tags_in for record with given recID. Note that when a partial 'tags_in' is specified (eg. '100__'), the subfields of all corresponding datafields are returned all 'mixed' together. Eg. with:: 123 100__ $a Ellis, J $u CERN 123 100__ $a Smith, K >>> get_all_fieldvalues(123, '100__') ['Ellis, J', 'CERN', 'Smith, K'] @param recID: record ID to consider @param tags_in: list of tags got retrieve @return: a list of values corresponding to X{tags_in} found in X{recID} """ out = [] if type(tags_in) is not list: tags_in = [tags_in, ] dict_of_tags_out = {} if not tags_in: for i in range(0, 10): for j in range(0, 10): dict_of_tags_out["%d%d%%" % (i, j)] = '%' else: for tag in tags_in: if len(tag) == 0: for i in range(0, 10): for j in range(0, 10): dict_of_tags_out["%d%d%%" % (i, j)] = '%' elif len(tag) == 1: for j in range(0, 10): dict_of_tags_out["%s%d%%" % (tag, j)] = '%' elif len(tag) <= 5: dict_of_tags_out["%s%%" % tag] = '%' else: dict_of_tags_out[tag[0:5]] = tag[5:6] tags_out = dict_of_tags_out.keys() tags_out.sort() # search all bibXXx tables as needed: for tag in tags_out: digits = tag[0:2] try: intdigits = int(digits) if intdigits < 0 or intdigits > 99: raise ValueError except ValueError: # invalid tag value asked for continue bx = "bib%sx" % digits bibx = "bibrec_bib%sx" % digits query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx) res = run_sql(query, (recID, str(tag)+dict_of_tags_out[tag])) # go through fields: for row in res: field, value, field_number = row[0], row[1], row[2] out.append(value) return out re_bold_latex = re.compile('\$?\\\\textbf\{(?P.*?)\}\$?') re_emph_latex = re.compile('\$?\\\\emph\{(?P.*?)\}\$?') re_generic_start_latex = re.compile('\$?\\\\begin\{(?P.*?)\}\$?') re_generic_end_latex = re.compile('\$?\\\\end\{(?P.*?)\}\$?') re_verbatim_env_latex = re.compile('\\\\begin\{verbatim.*?\}(?P.*?)\\\\end\{verbatim.*?\}') def latex_to_html(text): """ Do some basic interpretation of LaTeX input. Gives some nice results when used in combination with MathJax. @param text: input "LaTeX" markup to interpret @return: a representation of input LaTeX more suitable for HTML """ # Process verbatim environment first def make_verbatim(match_obj): """Replace all possible special chars by HTML character entities, so that they are not interpreted by further commands""" return '
' + \
                string_to_numeric_char_reference(match_obj.group('content')) + \
                '

' text = re_verbatim_env_latex.sub(make_verbatim, text) # Remove trailing "line breaks" text = text.strip('\\\\') # Process special characters text = text.replace("\\%", "%") text = text.replace("\\#", "#") text = text.replace("\\$", "$") text = text.replace("\\&", "&") text = text.replace("\\{", "{") text = text.replace("\\}", "}") text = text.replace("\\_", "_") text = text.replace("\\^{} ", "^") text = text.replace("\\~{} ", "~") text = text.replace("\\textregistered", "®") text = text.replace("\\copyright", "©") text = text.replace("\\texttrademark", "™ ") # Remove commented lines and join lines text = '\\\\'.join([line for line in text.split('\\\\') \ if not line.lstrip().startswith('%')]) # Line breaks text = text.replace('\\\\', '
') # Non-breakable spaces text = text.replace('~', ' ') # Styled text def make_bold(match_obj): "Make the found pattern bold" # FIXME: check if it is valid to have this inside a formula return '' + match_obj.group('content') + '' text = re_bold_latex.sub(make_bold, text) def make_emph(match_obj): "Make the found pattern emphasized" # FIXME: for the moment, remove as it could cause problem in # the case it is used in a formula. To be check if it is valid. return ' ' + match_obj.group('content') + '' text = re_emph_latex.sub(make_emph, text) # Lists text = text.replace('\\begin{enumerate}', '
    ') text = text.replace('\\end{enumerate}', '
') text = text.replace('\\begin{itemize}', '
    ') text = text.replace('\\end{itemize}', '
') text = text.replace('\\item', '
  • ') # Remove remaining non-processed tags text = re_generic_start_latex.sub('', text) text = re_generic_end_latex.sub('', text) return text def get_pdf_snippets(recID, patterns, nb_words_around=CFG_WEBSEARCH_FULLTEXT_SNIPPETS_WORDS, max_snippets=CFG_WEBSEARCH_FULLTEXT_SNIPPETS): """ Extract text snippets around 'patterns' from the newest PDF file of 'recID' The search is case-insensitive. The snippets are meant to look like in the results of the popular search engine: using " ... " between snippets. For empty patterns it returns "" @param recID: record ID to consider @param patterns: list of patterns to retrieve @param nb_words_around: max number of words around the matched pattern @param max_snippets: max number of snippets to include @return: snippet """ from invenio.bibdocfile import BibRecDocs text_path = "" text_path_courtesy = "" for bd in BibRecDocs(recID).list_bibdocs(): if bd.get_text(): text_path = bd.get_text_path() text_path_courtesy = bd.get_status() if CFG_INSPIRE_SITE and not text_path_courtesy: # get courtesy from doctype, since docstatus was empty: text_path_courtesy = bd.get_type() if text_path_courtesy == 'INSPIRE-PUBLIC': # but ignore 'INSPIRE-PUBLIC' doctype text_path_courtesy = '' break # stop at the first good PDF textable file if text_path: out = get_text_snippets(text_path, patterns, nb_words_around, max_snippets) if not out: # no hit, so check stemmed versions: from invenio.bibindex_engine_stemmer import stem stemmed_patterns = [stem(p, 'en') for p in patterns] out = get_text_snippets(text_path, stemmed_patterns, nb_words_around, max_snippets, False) if out: out_courtesy = "" if text_path_courtesy: out_courtesy = 'Snippets courtesy of ' + text_path_courtesy + '
    ' return """
    %s%s
    """ % (out_courtesy, out) else: return "" else: return "" def get_text_snippets(textfile_path, patterns, nb_words_around, max_snippets, \ right_boundary = True): """ Extract text snippets around 'patterns' from file found at 'textfile_path' The snippets are meant to look like in the results of the popular search engine: using " ... " between snippets. For empty patterns it returns "" The idea is to first produce big snippets with grep and then narrow them using the cut_out_snippet function. TODO: - distinguish the beginning of sentences and try to make the snippets start there @param textfile_path: path to a text file to extract snippet from @param patterns: list of patterns to retrieve @param nb_words_around: max number of words around the matched pattern @param max_snippets: max number of snippets to include @param right_boundary: match the right word boundary or not @return: snippet """ if len(patterns) == 0: return "" # escape the parenthesis unless there is no space between then (e.g. U(1)) # escaping of spaces is done later - only for grepping . escaped_keywords = [] for w in patterns: #if there are both '(' and ')' in one word we leave them if w.count('(') or w.count(')'): if re.match("\w*\(\w*\)\w*", w): w1 = w.replace('(', '\(') escaped_keywords.append(w1.replace(')', '\)')) else: w1 = w.replace('(', '') escaped_keywords.append(w1.replace(')', '')) else: escaped_keywords.append(w) # the max number of words that the snippets can have for this record words_left = max_snippets * (nb_words_around * 2 + 1) # Assuming that there will be at least one word per line we can produce the # big snippets like this # FIXME: the ligature replacement should be done at the textification time; # then the sed expression can go away here. sed_cmd = "sed \'s/ff/ff/g; s/fi/fi/g; s/fl/fl/g; s/ffi/ffi/g; s/ffl/ffl/g\' " \ + textfile_path grep_args = [str(nb_words_around), str(nb_words_around), str(max_snippets)] grep_cmd = "grep -i -E -A%s -B%s -m%s" for p in escaped_keywords: grep_cmd += " -e \"(\\b|\\s)\"%s" if right_boundary: grep_cmd += "\"(\\b|\\s)\"" # space escaping needed for grepping for phrases grep_args.append(p.replace(' ', '\ ')) sed_call = shlex.split(sed_cmd) grep_call = shlex.split(grep_cmd % tuple(grep_args)) p1 = Popen(sed_call, stdout=PIPE) p2 = Popen(grep_call, stdin=p1.stdout, stdout=PIPE) output = p2.communicate()[0] result = [] big_snippets = output.split("--") # cut the snippets to match the nb_words_around parameter precisely: for s in big_snippets: if words_left > 0: #using patterns instead of escaped_keywords to make phase search work #FIXME: parenthesis are not displayed in snippets (small_snippets, words_left) = cut_out_snippet(s, patterns, \ nb_words_around, words_left, right_boundary) #count words result += small_snippets # combine snippets out = "" count = 0 for snippet in result: if snippet and count < max_snippets: if out: out += "
    " out += "..." + snippet + "..." count += 1 return out def get_tokens(text): """ Tokenize the words in the text @param text: text to extract token from @return: a list of tokens """ b_pattern = '\\W+' tokens = [] comp_p_b = re.compile(b_pattern, re.IGNORECASE | re.UNICODE) previous = 0 for match in comp_p_b.finditer(text): (s, e) = match.span() if previous < s: tokens.append((previous, s)) previous = e return tokens def cut_out_snippet(text, patterns, nb_words_around, max_words, right_boundary = True): """ Cut out one ore more snippets, limits to max_words param. The snippet can include many occurances of the patterns if they are not further appart than 2 * nb_words_around. @param text: the text to process @param patterns: the patterns to match @param nb_words_around: max number of words around the matched pattern @param max_words: maximum number of words in the snippet @param right_boundary: match the right word boundary or not @return: a tuple (list of snippets, max_words (?)) """ #index in 'matches' next_pattern = 0 #get token representing words in text tokens = get_tokens(text) #Build a pattern of the kind keyword1 | keyword2 | keyword3 if right_boundary: pattern = '(\\b)(' + '|'.join(patterns) + ')(\\b)' else: pattern = '(\\b)(' + '|'.join(patterns) + ')' compiled_pattern = re.compile(pattern, re.IGNORECASE | re.UNICODE) matches = [] for x in compiled_pattern.finditer(text): matches.append((x.start(), x.end())) def matches_any(index): """ Is text[index] a beginning of a pattern? """ if next_pattern < len(matches) and tokens[index][0] == matches[next_pattern][0]: return True else: return False snippets = [] snippet = "" last_written_word = -1 i = 0 # next_pattern was set above #FIXME: it would be better to generate all matching snippets and then score them, #choose the most relevant ones. while i < len(tokens) and max_words > 3: (s, e) = tokens[i] # For the last snippet for this record: # make the nb_words_around smaller if required by max_words # to make sure that at least one pattern is included while nb_words_around * 2 + 1 > max_words: nb_words_around -= 1 #can be first or a following pattern in this snippet if matches_any(i): (sm,em) = matches[next_pattern] #move pointer next_pattern += 1 # add part before first or following occurance of a word j = max(last_written_word + 1, i - nb_words_around) snippet += ' ' + text[tokens[j][0]:tokens[i-1][1]] # write the pattern snippet += (" " + text[sm:em]) while tokens[i][1] < em: i += 1 last_written_word = i # write the suffix. If pattern found, break j = 1 while j <= nb_words_around and i + j < len(tokens): if matches_any(i+j): break else: snippet += (" " + text[tokens[i+j][0]:tokens[i+j][1]]) last_written_word = i + j j += 1 i += j else: i += 1 # if the snippet is ready (i.e. we didn't just find a new match) if snippet != "" and i < len(tokens) and not matches_any(i): split_snippet = snippet.split() #temporary rule of a thumb to make sure snippets aren't too long if len(split_snippet) > nb_words_around * 4: snippet = " ".join(split_snippet[0:(nb_words_around * 3)]) max_words -= len(snippet.split()) #potentially new length snippets.append(highlight_matches(snippet, compiled_pattern)) snippet = "" return (snippets, max_words) diff --git a/modules/bibformat/lib/elements/bfe_issn.py b/modules/bibformat/lib/elements/bfe_issn.py index f78a6b2bb..86d282502 100644 --- a/modules/bibformat/lib/elements/bfe_issn.py +++ b/modules/bibformat/lib/elements/bfe_issn.py @@ -1,1083 +1,1082 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Print ISSN corresponding to given journal name """ __revision__ = "$Id$" import pprint import urllib import sys import re import getopt -from invenio.search_engine import \ - get_fieldvalues, \ - perform_request_search +from invenio.search_engine import perform_request_search +from invenio.search_engine_utils import get_fieldvalues from invenio.config import CFG_CERN_SITE if CFG_CERN_SITE: journal_name_tag = '773__p' else: journal_name_tag = '909C4p' issns = { 'aapps bull.': '0218-2203', 'account. manag. inf. technol.': '0959-8022', 'acm comput. surv.': '0360-0300', 'acm sigplan not.': '0362-1340', 'acm trans. comput. syst.': '0734-2071', 'acm trans. comput.-hum. interact.': '1073-0516', 'acm trans. database syst.': '0362-5915', 'acm trans. graph.': '0730-0301', 'acm trans. inf. syst. secur.': '1094-9224', 'acm trans. internet technol.': '1533-5399', 'acm trans. math. softw.': '0098-3500', 'acm trans. program. lang. syst.': '0164-0925', 'acm trans. storage': '1553-3077', 'acta appl. math.': '1572-9036', 'acta arith.': '1730-6264', 'acta fac. rerum nat. univ. comen.: math.': '0373-8183', 'acta math. appl. sin.': '1618-3932', 'acta math. hung.': '1588-2632', 'acta mech. sin.': '1614-3116', 'acta mech. solida sin.': '1860-2134', 'acta phys. pol. a': '0587-4246', 'ad hoc netw.': '1570-8705', 'adsorption': '1572-8757', 'adv. cement based mater.': '1065-7355', 'adv. colloid interface sci.': '0001-8686', 'adv. compos. mater.': '1568-5519', 'adv. eng. inform.': '1474-0346', 'adv. eng. mater.': '1527-2648', 'adv. geom.': '1615-715X', 'adv. mater.': '1521-4095', 'adv. math.': '0001-8708', 'adv. perform. mater.': '1572-8765', 'adv. powder technol.': '1568-5527', 'adv. robot.': '1568-5535', 'air space eur.': '1290-0958', 'algebr. represent. theory': '1572-9079', 'anal. math.': '0133-3852', 'analog integr. circuits signal process.': '0925-1030', 'angew. chem.': '1521-3757', 'angew. chem. int. ed.': '1521-3773', 'ann. glob. anal. geom.': '0232-704X', 'ann. inst. stat. math.': '0020-3157', 'ann. mat. pura appl.': '1618-1891', 'ann. math. artif. intell.': '1012-2443', 'ann. oper. res.': '1572-9338', 'ann. phys. (san diego)': '0003-4916', 'ann. phys. (weinheim)': '1521-3889', 'ann. pol. math.': '1730-6272', 'ann. sci. ec. norm. sup\xc3\xa9r.': '0012-9593', 'ann. softw. eng.': '1573-7489', 'annu. rev. nucl. part. sci.': '0163-8998', 'appl. compos. mater.': '0929-189X', 'appl. intell.': '0924-669X', 'appl. math.': '0862-7940', 'appl. math. electron. notes': '1607-2510', 'appl. phys.': '0340-3793', 'appl. soft comput.': '1568-4946', 'appl. stoch. models bus. ind.': '1526-4025', 'appl. supercond.': '0964-1807', 'appl. surf. sci.': '0378-5963', 'approx. theory. appl.': '1000-9221', 'arch. mus. inform.': '1573-7500', 'arch. sci.': '1573-7519', 'ariadne': '1361-3200', 'artif. intell. rev.': '0269-2821', 'artif. life robot.': '1614-7456', 'asimmetrie': '1827-1383', 'astron. educ. rev.': '1539-1515', 'astron. q.': '0364-9229', 'astrophys. space sci.': '1572-946X', 'astrophys. space sci. trans.': '1810-6536', 'astrophysics': '1573-8191', 'at. data nucl. data tables': '0092-640X', 'at. energy': '1573-8205', 'atom': '0004-7015', 'autom. remote control.': '0005-1179', 'autom. softw. eng.': '0928-8910', 'auton. agents multi-agent syst.': '1387-2532', 'auton. robots': '0929-5593', 'banach cent. publ.': '1730-6299', 'ber. bunsenges. phys. chem.': '0005-9021', 'ber. wiss.gesch.': '1522-2365', 'bioelectromagnetics': '1521-186X', 'biom. j.': '1521-4036', 'biomed. eng.': '0006-3398', 'biophys. chem.': '0301-4622', 'biosens. bioelecton.': '0956-5663', 'bol. asoc. math. venez.': '1315-4125', 'br. j. appl. phys.': '0508-3443', 'bt technol. j.': '1358-3948', 'bulg. j. phys.': '1310-0157', 'bull. earthq. eng.': '1573-1456', 'bull. soc. math. fr.': '0037-9484', 'bull. union phys.': '0366-3878', 'bus. strategy environ.': '1099-0836', 'c. r. acad. sci., 2b': '1620-7742', 'c. r. mech.': '1631-0721', 'c. r. phys.': '1631-0705', 'can. j. electr. comput. eng.': '0840-8688', 'cas. pest. mat.': '0862-7959', 'catal. lett.': '1572-879X', 'celest. mech. dyn. astron.': '1572-9478', 'chem. mater.': '1520-5002', 'chem. vap. depos.': '1521-3862', 'chemphyschem': '1439-7641', 'chin. astron.': '0146-6364', 'chin. librariansh.': '1089-4667', 'chin. opt. lett.': '1671-7694', 'chin. phys.': '1741-4199', 'chin. phys. lett.': '0256-307X', 'circuit world': '0305-6120', 'circuits syst. signal process.': '1531-5878', 'clean technol. environ. policy': '1618-9558', 'clefs cea': '0298-6248', 'clin. phys. physiol. meas.': '0143-0815', 'cluster comput.': '1386-7857', 'coastal eng.': '0378-3839', 'colloid j.': '1608-3067', 'colloq. math.': '1730-6310', 'comments mod. phys.': '1560-5892', 'commun. acm': '0001-0782', 'commun. nonlinear sci. numer. simul.': '1007-5704', 'commun. pure appl. math.': '1097-0312', 'commun. soc.: dig. news events interest commun. eng.': '0094-5579', 'complexity': '1099-0526', 'compos. math.': '0010-437X', 'composites': '0010-4361', 'comput. archit. news': '0163-5964', 'comput. bull.': '0010-4531', 'comput. chem. eng.': '0098-1354', 'comput. commun. rev.': '0146-4833', 'comput. graph.': '0097-8930', 'comput. humanit.': '1572-8412', 'comput. ind. eng.': '0360-8352', 'comput. integr. manuf. syst.': '0951-5240', 'comput. math. model.': '1046-283X', 'comput. math. organ. theory': '1381-298X', 'comput. netw. isdn syst.': '0169-7552', 'comput. optim. appl.': '0926-6003', 'comput. phys. rep.': '0167-7977', 'comput. soc.': '0095-2737', 'comput. softw.': '0289-6540', 'comput. speech lang.': '0885-2308', 'comput. support. coop. work': '0925-9724', 'comput. vis. image underst.': '1077-3142', 'computer': '0018-9162', 'concurr. comput.: pract. exp.': '1532-0634', 'concurr.: pract. exp.': '1096-9128', 'constraints': '1572-9354', 'contact context': '1547-8890', 'contrib. plasma phys.': '1521-3986', 'cosm. res.': '0010-9525', 'cost eng. j.': '0274-9626', 'cryst. growth des.': '1528-7483', 'cryst. res. technol.': '1521-4079', 'cultiv. interact.': '1471-3225', 'curr. appl. phys.': '1567-1739', 'curr. opin. colloid. interface sci.': '1359-0294', 'cybermetrics': '1137-5019', 'cybern. syst. anal.': '1060-0396', 'czechoslov. j. phys.': '1572-9486', 'czechoslov. math. j.': '0011-4642', 'data base adv. inf. syst.': '0095-0033', 'data min. knowl. discov.': '1384-5810', 'data sci. j.': '1683-1470', 'des. autom. embed. syst.': '1572-8080', 'des. codes cryptogr.': '0925-1022', 'des. monomers polym.': '1568-5551', 'differ. equ.': '0012-2116', 'differ. geom.': '1454-511X', 'digit. signal process.': '1051-2004', 'discrete event dyn. syst.': '1573-7594', 'distrib. parallel databases': '0926-8272', 'documentaliste': '0012-4508', 'dokl. phys. chem.': '0012-5016', 'dyn. control': '1573-8450', 'e-polymers': '1618-7229', 'e-streams': '1098-4399', 'earth moon planets': '0167-9295', 'ec compet. policy newsl.': '1025-2266', 'educ. inf. technol.': '1360-2357', 'educ. stud. math.': '1573-0816', 'egypt. j. solids': '1012-5566', 'electrodepos. surf. treat.': '0300-9416', 'electron microsc. rev.': '0892-0354', 'electron. j. comb.': '1027-5487', 'electron. j. theor. phys.': '1729-5254', 'electron. libr.': '0264-0473', 'electron. res. announc. am. math. soc.': '1079-6762', 'electron. trans. artif. intell.': '1403-204X', 'empir. softw. eng.': '1382-3256', 'entropy': '1099-4300', 'environ. qual. manag.': '1520-6483', 'environmetrics': '1099-095X', 'epj a direct': '1435-3725', 'epj c direct': '1435-3725', 'epj e direct': '1435-3725', 'eso astrophys. symp.': '1611-6143', 'ethics inf. technol.': '1572-8439', 'etri j.': '1225-6463', 'eur. environ.': '1099-0976', 'eur. j. solid state inorg. chem.': '0992-4361', 'eur. trans. electr. power': '1546-3109', 'eur. union': '1472-3395', 'eurasip j. wirel. commun. netw.': '1687-1499', 'exergy': '1164-0235', 'exp. astron.': '1572-9508', 'extremes': '1572-915X', 'fire technol.': '1572-8099', 'fluid dyn.': '1573-8507', 'form. methods syst. des.': '1572-8102', 'forschung': '1522-2357', 'fortran forum': '1061-7264', 'fortschr. phys.': '1521-3978', 'found. phys.': '0015-9018', 'found. phys. lett.': '0894-9875', 'free online scholarsh. newsl.': '1535-7848', 'freepint newsl.': '1460-7239', 'frontiers (swindon)': '1460-5600', 'fuel cells': '1615-6854', 'funct. anal. appl.': '0016-2663', 'fundam. math.': '1730-6329', 'fuzzy optim. decis. mak.': '1573-2908', 'gaz. vide': '1638-802X', 'gen. relativ. gravit.': '1572-9532', 'geoinformatica': '1384-6175', 'germ. res.': '1522-2322', 'glass ceram.': '1573-8515', 'gps solut.': '1521-1886', 'graph. models image process.': '1077-3169', 'heat recovery syst. chp': '0890-4332', 'high energy chem.': '1608-3148', 'high energy density phys.': '1574-1818', 'high energy phys. nucl. phys. (beijing)': '0254-3052', 'high temp.': '0018-151X', 'hit j. sci. eng.': '1565-5008', 'icarus': '0019-1035', 'icsti forum': '1018-9580', 'ieee aerosp. electron. syst. mag.': '0885-8985', 'ieee ann. hist. comput.': '1058-6180', 'ieee antennas propag. mag.': '1045-9243', 'ieee antennas wirel. propag. lett.': '1536-1225', 'ieee assp mag.': '0740-7467', 'ieee circuits devices mag.': '8755-3996', 'ieee circuits syst. mag.': '1531-636X', 'ieee commun. lett.': '1089-7798', 'ieee commun. mag.': '0163-6804', 'ieee comput. appl. power': '0895-0156', 'ieee comput. graph. appl.': '0272-1716', 'ieee comput. sci. eng.': '1070-9924', 'ieee concurr.': '1092-3063', 'ieee control syst. mag.': '0272-1708', 'ieee des. test comput.': '0740-7475', 'ieee distrib. syst. online': '1541-4922', 'ieee electr. insul. mag.': '0883-7554', 'ieee electron device lett.': '0741-3106', 'ieee eng. med. biol. mag.': '0739-5175', 'ieee expert mag.': '0885-9000', 'ieee instrum. measur. mag.': '1094-6969', 'ieee intell. syst.': '1541-1672', 'ieee intell. syst. appl.': '1094-7167', 'ieee internet comput.': '1089-7801', 'ieee j. ocean. eng.': '0364-9059', 'ieee j. quantum electron.': '0018-9197', 'ieee j. robot. autom.': '0882-4967', 'ieee j. sel. areas commun.': '0733-8716', 'ieee j. sel. top. quantum electron.': '1077-260X', 'ieee j. solid state circuits': '0018-9200', 'ieee lcs': '1045-9235', 'ieee lts': '1055-6877', 'ieee micro': '0272-1732', 'ieee microw. guid. wave lett.': '1051-8207', 'ieee microw. mag.': '1527-3342', 'ieee microw. wirel. compon. lett.': '1531-1309', 'ieee multimed.': '1070-986X', 'ieee netw.': '0890-8044', 'ieee parallel distrib. technol.: syst. appl.': '1063-6552', 'ieee pers. commun.': '1070-9916', 'ieee pervasive comput.': '1536-1268', 'ieee photonics technol. lett.': '1041-1135', 'ieee potentials': '0278-6648', 'ieee power electron. lett.': '1540-7985', 'ieee power energy mag.': '1540-7977', 'ieee power eng. rev.': '0272-1724', 'ieee robot. autom. mag.': '1070-9932', 'ieee secur. priv. mag.': '1540-7993', 'ieee sens. j.': '1530-437X', 'ieee signal process. lett.': '1070-9908', 'ieee signal process. mag.': '1053-5888', 'ieee softw.': '0740-7459', 'ieee spectr.': '0018-9235', 'ieee technol. soc. mag.': '0278-0097', 'ieee trans. acoust. speech signal process.': '0096-3518', 'ieee trans. adv. packag.': '1521-3323', 'ieee trans. aerosp. electron. syst.': '0018-9251', 'ieee trans. antennas propag.': '0018-926X', 'ieee trans. appl. supercond.': '1051-8223', 'ieee trans. audio': '0096-1620', 'ieee trans. audio electroacoust.': '0018-9278', 'ieee trans. audio speech lang. process.': '1558-7916', 'ieee trans. autom. sci. eng.': '1545-5955', 'ieee trans. automat. control': '0018-9286', 'ieee trans. biomed. eng.': '0018-9294', 'ieee trans. broadcast.': '0018-9316', 'ieee trans. circuits syst.': '0098-4094', 'ieee trans. circuits syst. video technol.': '1051-8215', 'ieee trans. circuits syst., i': '1057-7122', 'ieee trans. circuits syst., ii': '1057-7130', 'ieee trans. commun.': '0090-6778', 'ieee trans. compon. hybrids manuf. technol.': '0148-6411', 'ieee trans. compon. packag. manuf. technol. a': '1070-9886', 'ieee trans. compon. packag. manuf. technol. b': '1070-9894', 'ieee trans. compon. packag. manuf. technol. c': '1083-4400', 'ieee trans. compon. packag. technol.': '1521-3331', 'ieee trans. compon. parts': '0097-6601', 'ieee trans. comput.': '0018-9340', 'ieee trans. comput.-aided des. integrat. circuits syst.': '0278-0070', 'ieee trans. consum. electron.': '0098-3063', 'ieee trans. control syst. technol.': '1063-6536', 'ieee trans. dependable secur. comput.': '1545-5971', 'ieee trans. device mater. reliab.': '1530-4388', 'ieee trans. dielectr. electr. insul.': '1070-9878', 'ieee trans. educ.': '0018-9359', 'ieee trans. electr. insul.': '0018-9367', 'ieee trans. electromagn. compat.': '0018-9375', 'ieee trans. electron devices': '0018-9383', 'ieee trans. electron. packag. manuf.': '1521-334X', 'ieee trans. energy convers.': '0885-8969', 'ieee trans. eng. manag.': '0018-9391', 'ieee trans. evol. comput.': '1089-778X', 'ieee trans. fuzzy syst.': '1063-6706', 'ieee trans. geosci. remote sens.': '0196-2892', 'ieee trans. image process.': '1057-7149', 'ieee trans. ind. appl.': '0093-9994', 'ieee trans. ind. electron.': '0278-0046', 'ieee trans. ind. inform.': '1551-3203', 'ieee trans. inf. technol. biomed.': '1089-7771', 'ieee trans. inf. theory': '0018-9448', 'ieee trans. instrum. meas.': '0018-9456', 'ieee trans. intell. transp. syst.': '1524-9050', 'ieee trans. knowl. data eng.': '1041-4347', 'ieee trans. magn.': '0018-9464', 'ieee trans. manuf. technol.': '0046-838X', 'ieee trans. med. imaging': '0278-0062', 'ieee trans. microw. theory tech.': '0018-9480', 'ieee trans. mob. comput.': '1536-1233', 'ieee trans. multimed.': '1520-9210', 'ieee trans. nanobiosci.': '1536-1241', 'ieee trans. nanotechnol.': '1536-125X', 'ieee trans. neural netw.': '1045-9227', 'ieee trans. neural syst. rehabil. eng.': '1534-4320', 'ieee trans. nucl. sci.': '0018-9499', 'ieee trans. parallel distrib. syst.': '1045-9219', 'ieee trans. parts hybrids packag.': '0361-1000', 'ieee trans. parts mater. packag.': '0018-9502', 'ieee trans. pattern anal. mach. intell.': '0162-8828', 'ieee trans. plasma sci.': '0093-3813', 'ieee trans. power deliv.': '0885-8977', 'ieee trans. power electron.': '0885-8993', 'ieee trans. power syst.': '0885-8950', 'ieee trans. prod. eng. prod.': '0097-4544', 'ieee trans. prof. commun.': '0361-1434', 'ieee trans. rehabil. eng.': '1063-6528', 'ieee trans. reliab.': '0018-9529', 'ieee trans. robot.': '1552-3098', 'ieee trans. robot. autom.': '1042-296X', 'ieee trans. semicond. manuf.': '0894-6507', 'ieee trans. signal process.': '1053-587X', 'ieee trans. softw. eng.': '0098-5589', 'ieee trans. sonics ultrason.': '0018-9537', 'ieee trans. speech audio process.': '1063-6676', 'ieee trans. syst. man cybern.': '0018-9472', 'ieee trans. syst. man cybern. a': '1083-4427', 'ieee trans. syst. man cybern. b': '1083-4419', 'ieee trans. syst. man cybern. c': '1094-6977', 'ieee trans. ultrason. eng.': '0893-6706', 'ieee trans. ultrason., ferroelectr. freq. control': '0885-3010', 'ieee trans. veh. technol.': '0018-9545', 'ieee trans. very large scale integr. (vlsi) syst.': '1063-8210', 'ieee trans. vis. comput. graph.': '1077-2626', 'ieee trans. wirel. commun.': '1536-1276', 'ieee wirel. commun.': '1536-1284', 'ieee/acm trans. netw.': '1063-6692', 'ieee/asme trans. mechatron.': '1083-4435', 'iii-vs rev.': '0961-1290', 'inf. bull. var. stars': '1587-2440', 'inf. manag.': '0378-7206', 'inf. organ.': '1471-7727', 'inf. process. manag.': '0306-4573', 'inf. res.': '1368-1613', 'inf. retr.': '1386-4564', 'inf. sci. appl.': '1069-0115', 'inf. syst. e-bus. manag.': '1617-9854', 'inf. syst. front.': '1387-3326', 'inf. technol. disabil.': '1073-5727', 'inf. technol. manag.': '1385-951X', 'infeuro': '1027-930X', 'infrared phys.': '0020-0891', 'innov. syst. softw. eng.': '1614-5054', 'innov. teach. learn. inf. comput. sci.': '1473-1707', 'innov. technol. transf.': '1013-6452', 'innov. transf. technol.': '1025-692X', 'inorg. mater.': '1608-3172', 'instrum. exp. tech.': '0020-4412', 'int. appl. mech.': '1573-8582', 'int. insolv. rev.': '1099-1107', 'int. j. appl. electromagn. mech.': '1383-5416', 'int. j. appl. math. comput. sci.': '1641-876X', 'int. j. appl. radiat. isot.': '0020-708X', 'int. j. comput. math. learn.': '1382-3892', 'int. j. comput. vis.': '0920-9429', 'int. j. des. comput.': '1329-7147', 'int. j. electron. commun.': '1434-8411', 'int. j. electron. commun. (aeu)': '1434-8411', 'int. j. fract.': '0376-9429', 'int. j. hum.-comput. stud.': '1071-5819', 'int. j. infrared millim. waves': '1572-9559', 'int. j. intell. syst.': '1098-111X', 'int. j. mass spectrom.': '1387-3806', 'int. j. mass spectrom. ion process.': '0168-1176', 'int. j. mod. phys. d': '0218-2718', 'int. j. mod. phys. e': '0218-3013', 'int. j. parallel program.': '0885-7458', 'int. j. pattern recognit. artif. intell.': '0218-0014', 'int. j. prod. econ.': '0925-5273', 'int. j. radiat. appl. instrum. a': '0883-2889', 'int. j. radiat. appl. instrum. d': '1359-0189', 'int. j. radiat. phys. chem. (1969-76)': '0020-7055', 'int. j. radiat. phys. chem., c': '1359-0197', 'int. j. rock mech. min. sci.': '1365-1609', 'int. j. technol. des. educ.': '0957-7572', 'int. j. theor. phys.': '1572-9575', 'int. j. therm. sci.': '1290-0729', 'int. j. thermophys.': '1572-9567', 'int. j. wirel. inf. netw.': '1068-9605', 'intel. artif.': '1137-3601', 'interact. multimed. electron. j. comput. enhanc. learn.': '1525-9102', 'interface sci.': '0927-7056', 'ipn sci.': '1622-5120', 'ire trans. audio': '0096-1981', 'ire trans. autom. control': '0096-199X', 'ire trans. circuit theory': '0098-4094', 'ire trans. compon. parts': '0096-2422', 'ire trans. prod. eng. prod.': '0096-1779', 'ire trans. prod. tech.': '0096-1760', 'ire trans. ultrason. eng.': '0096-1019', 'it archit.': '1557-2145', 'it prof.': '1520-9202', 'itbm-rbm': '1297-9562', 'itbm-rbm news': '1297-9570', 'itnow': '1746-5702', 'j. acm assoc. comput. mach.': '0004-5411', 'j. adhes. sci. technol.': '1568-5616', 'j. algebr. comb.': '0925-9899', 'j. am. soc. inf. sci.': '1097-4571', 'j. am. soc. inf. sci. technol.': '1532-2890', 'j. anal. chem.': '1608-3199', 'j. appl. clin. med. phys.': '1526-9914', 'j. appl. electrochem.': '0021-891X', 'j. appl. mech. tech. phys.': '1573-8620', 'j. appl. spectrosc.': '1573-8647', 'j. artif. intell. res.': '1076-9757', 'j. astrophys. astron.': '0250-6335', 'j. autom. reason.': '0168-7433', 'j. biomater. sci., polym. ed.': '1568-5624', 'j. braz. comput. soc.': '0104-6500', 'j. chem. doc.': '1961-1974', 'j. chem. eng. data': '1520-5134', 'j. chemom.': '1099-128X', 'j. colloid interface sci.': '0021-9797', 'j. comput. aided mater. des.': '0928-1045', 'j. comput. anal. appl.': '1521-1398', 'j. comput. electron.': '1569-8025', 'j. comput. neurosci.': '0929-5313', 'j. comput. phys.': '0021-9991', 'j. comput. sci. technol.': '1860-4749', 'j. comput.- mediat. commun.': '1083-6101', 'j. corros. sci. eng.': '1466-8858', 'j. cosmol. astropart. phys.': '1475-7516', 'j. data sci.': '1683-8602', 'j. des. commun.': '1137-3601', 'j. digit. inf.': '1368-7506', 'j. disp. technol.': '1551-319X', 'j. dyn. control syst.': '1079-2724', 'j. dyn. differ. equ.': '1040-7294', 'j. elast.': '1573-2681', 'j. electroceram.': '1385-3449', 'j. electromagn. waves appl.': '1569-3937', 'j. electron. test.': '0923-8174', 'j. eng. math.': '0022-0833', 'j. eng. phys. thermophys.': '1573-871X', 'j. fluids struct.': '0889-9746', 'j. fourier anal. appl.': '1531-5851', 'j. fusion energy': '1572-9591', 'j. geophys. eng.': '1742-2132', 'j. glob. optim.': '0925-5001', 'j. grid comput.': '1572-9814', 'j. heuristics': '1381-1231', 'j. high energy phys.': '1126-6708', 'j. instrum.': '1748-0221', 'j. intell. inf. syst.': '0925-9902', 'j. intell. manuf.': '1572-8145', 'j. intell. robot. syst.': '1573-0409', 'j. interlibr. loan doc. deliv. electron. reserve': '1072-303X', 'j. jpn. stat. soc.': '1348-6365', 'j. lightwave technol.': '0733-8724', 'j. log. algebr. program.': '1567-8326', 'j. log. lang. inf.': '1572-9583', 'j. low temp. phys.': '1573-7357', 'j. magn. reson.': '1090-7807', 'j. magn. reson. a': '1064-1858', 'j. magn. reson. b': '1064-1866', 'j. magn. reson. imag.': '1522-2586', 'j. mater. cycles waste manag.': '1611-8227', 'j. mater. sci.': '0022-2461', 'j. mater. sci. lett.': '0261-8028', 'j. mater. sci.: mater. electron.': '0957-4522', 'j. mater. sci.: mater. med.': '0957-4530', 'j. mater. synth. process.': '1573-4870', 'j. math. imaging vis.': '0924-9907', 'j. math. model. algorithms': '1570-1166', 'j. math. sci.': '1072-3374', 'j. math. teach. educ.': '1573-1820', 'j. microcomput. appl.': '0745-7138', 'j. microelectromech. syst.': '1057-7157', 'j. micromechatron.': '1568-5632', 'j. nanomater.': '1687-4129', 'j. nanopart. res.': '1572-896X', 'j. netw. comput. appl.': '1084-8045', 'j. netw. syst. manag.': '1064-7570', 'j. neural eng.': '1741-2560', 'j. non-newton. fluid mech.': '0377-0257', 'j. nondestruct. eval.': '0195-9298', 'j. nucl. energy, c': '0368-3281', 'j. object technol.': '1660-1769', 'j. oper. manage.': '0272-6963', 'j. opt.': '0150-536X', 'j. opt. fiber commun. rep.': '1619-8638', 'j. opt. netw.': '1536-5379', 'j. opt. technol.': '1070-9762', 'j. optim. theory appl.': '0022-3239', 'j. parallel distrib. comput.': '0743-7315', 'j. phys. c': '0022-3719', 'j. phys. chem. a': '0092-7325', 'j. phys. chem. b': '0092-7325', 'j. phys. f': '0305-4608', 'j. phys. stud.': '1027-4642', 'j. phys.: conf. ser.': '1742-6596', 'j. polym. res.': '1022-9760', 'j. porous mater.': '1573-4854', 'j. product. anal.': '1573-0441', 'j. radiat. res.': '0449-3060', 'j. radioanal. nucl. chem.': '1588-2780', 'j. res. natl. inst. stand. technol.': '1044-677X', 'j. res. pract.': '1712-851X', 'j. russ. laser res.': '1573-8760', 'j. sci. commun.': '1824-2049', 'j. sci. comput.': '0885-7474', 'j. sci. instrum.': '0950-7671', 'j. soc. radiol. prot.': '0260-2814', 'j. softw. maint. evol.: res. pract.': '1532-0618', 'j. softw. maint.: res. pract.': '1096-908X', 'j. sound vib.': '0022-460X', 'j. south. acad. spec. librariansh.': '1525-321X', 'j. stat. mech., theory exp.': '1742-5468', 'j. stat. phys.': '1572-9613', 'j. stat. softw.': '1548-7660', 'j. strain anal. eng. des.': '0309-3247', 'j. supercomput.': '0920-8542', 'j. supercond.': '1572-9605', 'j. supercond. novel magn.': '1557-1939', 'j. supercrit. fluids': '0896-8446', 'j. syst. integr.': '1573-8787', 'j. technol. transf.': '0829-9912', 'j. theor. probab.': '0894-9840', 'j. therm. anal. calorim.': '1572-8943', 'j. vis. commun. image represent.': '1047-3203', 'j. vis. comput. animat.': '1099-1778', 'j. vlsi signal process. syst. signal image video technol.': '0922-5773', 'jpn. j. appl. phys.': '1347-4065', 'k-theory': '1573-0514', 'katharine sharp rev.': '1083-5261', 'kek news': '1343-3547', 'lasers med. sci.': '1435-604X', 'lett. math. phys.': '1573-0530', 'libr. philos. pract.': '1522-0222', 'linux j.': '1075-3583', 'lith. math. j.': '0363-1672', 'living rev. sol. phys.': '1614-4961', 'low temp. phys.': '1063-777X', 'mach. learn.': '0885-6125', 'macromol. chem. phys.': '1521-3935', 'macromol. mater. eng.': '1439-2054', 'macromol. rapid commun.': '1521-3927', 'macromol. symp.': '1521-3900', 'macromol. theory simul.': '1521-3919', 'magma magn. reson. mater. phys. biol. med.': '1352-8661', 'magn. reson. imaging': '0730-725X', 'mater. sci.': '1068-820X', 'mater. technol.': '1580-3414', 'math. notes': '0001-4346', 'math. phys. anal. geom.': '1385-0172', 'math. probl. eng.': '1563-5147', 'math. scand.': '0025-5521', 'mc j.': '1069-6792', 'meas. tech.': '0543-1972', 'meccanica': '1572-9648', 'mech. compos. mater.': '1573-8922', 'mech. syst. signal process.': '0888-3270', 'mech. time-depend. mater.': '1573-2738', 'med. phys.': '0094-2405', 'mediterr. j. math.': '1660-5454', 'met. sci. heat treat.': '0026-0673', 'metallurgist': '1573-8892', 'methodol. comput. appl. probab.': '1387-5841', 'metrika': '1436-5057', 'metrologia': '1681-7575', 'microfluid. nanofluid.': '1613-4990', 'micromater. nanomater.': '1619-2486', 'micron': '0968-4328', 'micron (1969-83) [online version]': '0047-7206', 'micron microsc. acta': '0739-6260', 'microw. rf': '0745-2993', 'milan j. math.': '1424-9294', 'minds mach.': '1572-8641', 'minerva': '0026-4695', 'mo. j. math. sci.': '0899-6180', 'mob. netw. appl.': '1572-8153', 'mol. eng.': '1572-8951', 'monogr. mat.': '0077-0507', 'monte carlo methods appl.': '1569-3961', 'mrs bull.': '0883-7694', 'multibody syst. dyn.': '1573-272X', 'multidimens. syst. signal process.': '0923-6082', 'multimed. tools appl.': '1380-7501', 'm\xc3\xa9c. ind.': '1296-2139', 'nagoya math. j.': '0027-7630', 'netw. comput.': '1046-4468', 'networks': '1097-0037', 'neural process. lett.': '1370-4621', 'neutron news': '1044-8632', 'new dir. high. educ.': '1522-2322', 'new dir. instit. res.': '1536-075X', 'new dir. stud. serv.': '1536-0695', 'new dir. teach. learn.': '1536-0768', 'nexus netw. j.': '1522-4600', 'nonlinear dyn.': '1573-269X', 'nonlinear phenom. complex syst.': '1561-4085', 'nonprofit couns.': '1520-6785', 'not. am. math. soc.': '1088-9477', 'nouv. rev. opt.': '0335-7368', 'nouv. rev. opt. appl.': '0029-4780', 'ntm int. j. hist. ethics nat. sci. technol. med.': '1420-9144', 'nucl. data sheets': '0090-3752', 'nucl. data sheets, a': '0550-306X', 'nucl. data sheets, b': '0090-550X', 'nucl. eng. des. fusion': '0167-899X', 'nucl. eng. technol.': '1738-5733', 'nucl. fusion': '0029-5515', 'nucl. instrum.': '0369-643X', 'nucl. instrum. methods': '0029-554X', 'nucl. instrum. methods phys. res.': '0167-5087', 'nucl. instrum. methods phys. res., a': '0167-5087', 'nucl. phys.': '0029-5582', 'nucl. phys. news': '1050-6896', 'nucl. struct. eng.': '0369-5816', 'nucl. track detect.': '0145-224X', 'nucl. tracks': '0735-245X', 'nucl. tracks radiat. meas.': '0191-278X', 'nucl. tracks radiat. meas. (1982-85)': '0735-245X', 'nucl. tracks radiat. meas. (1993)': '0969-8078', 'nukleonika': '1508-5791', 'numer. algorithms': '1572-9265', 'numer. methods partial differ. equ.': '1098-2426', 'nuovo cimento, riv.': '0393-697X', 'ocean dyn.': '1616-7228', 'open syst. inf. dyn.': '1230-1612', 'oper. syst. rev.': '0163-5980', 'opt. fiber technol.': '1068-5200', 'opt. netw. mag.': '1572-8161', 'opt. photonics news': '1047-6938', 'opt. quantum electron.': '0306-8919', 'opt. rev.': '1349-9432', 'opt. spectrosc.': '1562-6911', 'opt. switch. netw.': '1573-4277', 'opt. technol.': '0374-3926', 'optik': '0030-4026', 'optim. control appl. methods': '1099-1514', 'optim. eng.': '1389-4420', 'oxid. met.': '0030-770X', 'packag. technol. sci.': '1099-1522', 'pamm': '1617-7061', 'part. part. syst. charact.': '1521-4117', 'period. math. hung.': '1588-2829', 'pers. technol.': '1433-3066', 'pers. ubiquitous comput.': '1617-4917', 'philips j. res.': '0165-5817', 'photonic netw. commun.': '1572-8188', 'photonics nanostruct., fundam. appl.': '1569-4410', 'phys. biol.': '1478-3975', 'phys. earth planet. inter.': '0031-9201', 'phys. fluids (1958-88)': '0031-9171', 'phys. j.': '0031-9279', 'phys. j. indones. phys. soc.': '1410-8860', 'phys. lett.': '0031-9163', 'phys. lett., a': '0375-9601', 'phys. lett., b': '0370-2693', 'phys. life rev.': '1571-0645', 'phys. rev.': '0031-899X', 'phys. rev. (ser. i)': '0031-899X', 'phys. rev. lett.': '0031-9007', 'phys. rev. spec. top. phys. educ. res.': '1554-9178', 'phys. rev., a': '1050-2947;', 'phys. rev., b': '0163-1829', 'phys. rev., c': '0556-2813', 'phys. rev., d': '0556-2821', 'phys. rev., e': '1063-651x', 'phys. status solidi, c': '1610-1642', 'phys. technol.': '0305-4624', 'phys. unserer zeit': '1521-3943', 'physica': '0031-8914', 'physica b c': '0378-4363', 'plasma chem. plasma process.': '1572-8986', 'plasma phys.': '0032-1028', 'plasmas ions': '1288-3255', 'plasmas polym.': '1572-8978', 'poiesis prax.': '1615-6617', 'polym. gels netw.': '0966-7822', 'powder metall. met. ceram.': '1068-1302', 'power technol. eng.': '1570-1468', 'prace mat.- fiz.': '0867-5570', 'probab. surv.': '1549-5787', 'probl. inf. transm.': '0032-9460', 'proc. ieee': '0018-9219', 'proc. indian acad. sci., math. sci.': '0253-4142', 'proc. jpn. acad. a': '0386-2194', 'proc. phys. soc. (1926-48)': '0959-5309', 'proc. phys. soc. (1958-67)': '0370-1328', 'proc. phys. soc. lond.': '1478-7814', 'proc. phys. soc., a': '0370-1298', 'proc. phys. soc., b': '0370-1301', 'prog. cryst. growth charact.': '0146-3535', 'prog. nucl. magn. reson. spectrosc.': '0079-6565', 'prog. phys.': '1555-5615', 'prog. theor. phys., suppl.': '0375-9687', 'progr. struct. eng. mater.': '1528-2716', 'program. comput. softw.': '0361-7688', 'propellants explos. pyrotech.': '1521-4087', 'prot. met.': '0033-1732', 'publ. math. ihes': '1618-1913', 'public underst. sci.': '1361-6609', 'pure appl. opt.': '0963-9659', 'qual. assur. j.': '1099-1786', 'qual. reliab. eng. int.': '1099-1638', 'quant. financ.': '1469-7688', 'quantum inf. process.': '1570-0755', 'quantum opt.': '0954-8998', 'quantum semiclass. opt.': '1355-5111', 'queueing syst.': '1572-9443', 'radiat. phys. chem. (1977-85)': '0146-5724', 'radiochemistry': '1608-3288', 'radioisotopes': '0033-8303', 'radiophys. quantum electron.': '1573-9120', 'radioprotection': '1769-700X', 'ramanujan j.': '1572-9303', 'rbm-news': '0222-0776', 'real time imaging': '1077-2014', 'real time syst.': '0922-6443', 'refract. ind. ceram.': '1083-4877', 'reliab. comput.': '1385-3139', 'rend. lincei': '1720-0768', 'rend. lincei sci. fis. nat.': '1720-0776', 'rend. semin. mat.': '0373-1243', 'res. inf.': '1744-8026', 'res. lang. comput.': '1572-8706', 'res. nondestruct. eval.': '1432-2110', 'rev. electron. suisse sci. inf.': '1661-1802', 'rev. g\xc3\xa9n. therm.': '0035-3159', 'rev. mex. fis.': '0035-001X', 'rev. phys. chim. appl. ind.': '1153-9771', 'rheol. acta': '1435-1528', 'risonyt': '0108-0350', 'rom. rep. phys.': '1221-1451', 'rozpr. mat.': '0860-2581', 'russ. j. electrochem.': '1023-1935', 'russ. j. nondestruct. test.': '1061-8309', 'russ. j. numer. anal. math. model.': '1569-3988', 'russ. microelectron.': '1063-7397', 'russ. phys. j.': '1573-9228', 's. afr. j. inf. manag.': '1560-683X', 'sankhya. indian j. stat.': '0036-4452', 'sci. am.': '0036-8733', 'sci. avenir': '0036-8636', 'sci. educ.': '1098-237X', 'sci. soils': '1432-9492', 'sci. vie hors s\xc3\xa9r.': '0151-0282', 'scientometrics': '0138-9130', 'semicond. int.': '0163-3767', 'semicond. phys. quantum electron. optoelectron.': '1605-6582', 'semigroup forum': '0037-1912', 'sens. actuators a': '0924-4247', 'sens. actuators b': '0925-4005', 'sens. update': '1616-8984', 'sensors': '1424-8220', 'ser. enews': '1476-0576', 'serials': '0953-0460', 'sib. math. j.': '0037-4466', 'sigact news': '0163-5700', 'sigbio newsl.': '0163-5697', 'sigcse bull.': '0097-8418', 'sigsam bull.': '0163-5824', 'simul. model. pract. theory': '1569-190X', 'single mol.': '1438-5171', 'softw. eng. notes': '0163-5948', 'softw. focus': '1529-7950', 'softw. process: improv. pract.': '1099-1670', 'softw. qual. j.': '0963-9314', 'softw. syst. model.': '1619-1374', 'softw. test. verif. reliab.': '1099-1689', 'softw.- pract. exp.': '1097-024X', 'sol. syst. res.': '0038-0946', 'solaris': '1265-4876', 'solid state sci.': '1293-2558', 'space sci. rev.': '0038-6308', 'sparc open access newsl.': '1546-7821', 'stat. comput.': '0960-3174', 'stat. methods appl.': '1613-981X', 'stat. sci.': '0883-4237', 'stnews': '1040-1229', 'strength mater.': '1573-9325', 'struct. des. tall build.': '1099-1794', 'stud. hist. philos. mod. phys.': '1355-2198', 'studia log.': '1572-8730', 'studia math.': '1730-6337', 'subsurf. sens. technol. appl.': '1573-9317', 'superlattices microstruct.': '0749-6036', 'surf. sci. lett.': '0167-2584', 'surf. technol.': '0376-4583', 'surv. high energy phys.': '0142-2413', 'synthese': '1573-0964', 'syst. comput. jpn.': '1520-684X', 'syst. eng.': '1520-6858', 'taiwan. j. math.': '1027-5487', 'telecommun. syst.': '1018-4864', 'theor. math. phys.': '1573-9333', 'theory comput.': '1557-2862', 'theory pract. object syst.': '1096-9942', 'trans. ire prof. group commun. syst.': '0277-6243', 'trans. ire prof. group compon. parts': '0096-2422', 'trans. ire prof. group ultrason. eng.': '0277-626X', 'trans. jpn. soc. artif. intell.': '1346-8030', 'trans. opt. soc.': '1475-4878', 'tribol. lett.': '1573-2711', 'tsinghua sci. technol.': '1007-0214', 'turk. j. math.': '1300-0098', 'turk. j. phys.': '1300-0101', 'ukr. math. j.': '0041-5995', 'ultrason. imaging': '0161-7346', 'univers. access inf. soc.': '1615-5297', 'upgrade': '1684-5285', 'user model. user adapt. interact.': '1573-1391', 'uspekhi fiz. nauk': '0042-1294', 'vak. forsch. prax.': '1522-2454', 'vine': '1474-1032', 'virtual real.': '1434-9957', 'web semant.': '1570-8268', 'weld. int.': '1573-9449', 'wind energy': '1099-1824', 'wirel. commun. mob. comput.': '1530-8677', 'wirel. netw.': '1022-0038', 'wirel. pers. commun.': '0929-6212', 'world pat. inf.': '0172-2190', 'world wide web': '1386-145X', 'z. anal. anwend.': '0232-2064', 'z. angew. math. mech.': '1521-4001', 'z. krist.gr.': '0044-2968', 'z. phys.': '0044-3328', 'z. phys., c': '0170-9739'} def format_element(bfo): """ Returns the ISSN of the record, if known.
    Note that you HAVE to pre-generate the correspondances journal->ISSN if you want this element to return something (Run python bfe_issn.py -h to get help). """ journal_name = bfo.field(journal_name_tag) # Here you might want to process journal name # by doing the same operation that has been # done when saving the mappings journal_name = journal_name.lower().strip() if journal_name.endswith("[online]"): journal_name = journal_name[:-8].rstrip() return issns.get(journal_name, '') def build_issns_from_distant_site(url): """ Retrieves the ISSNs from a distant Invenio system. Store the "journal name -> issn" relation. Normalize journal names a little bit: - strip whithespace chars (left and right) - all lower case - remove "[Online]" suffix Print the result as Python dict structure. @param url: the url to load issn from (in the *exact* form: http://www.mysite.com/) """ ## Parse the results of the http request: ## http://cdsweb.cern.ch/search?cc=Periodicals&ot=022,210&of=tm&rg=9000 pattern_field = re.compile(r''' \D*(?P\d*) #document id \s(?P\d*)__\s\$\$a #tag (?P.*?)$ #value ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) request = '/search?cc=Periodicals&ot=022,210&of=tm' try: fields = urllib.urlopen(url.rstrip('/') + request).readlines() except IOError: sys.stderr.write("Error: Could not connect to %s.\n" % url) sys.exit(0) last_doc_id = None last_issn = None built_issns = {} #built_issns = issns # Uncomment this to extend existing issns dict # (e.g. in case of manual addition) for field in fields: result = pattern_field.search(field) if result: doc_id = result.group('docid') if doc_id != last_doc_id: # Reset saved ISSN if we parse new document last_issn = None tag = result.group('tag') if tag == '022': # Remember this ISSN last_issn = result.group('value') elif tag == '210' and last_issn is not None: # Found a journal name and issn exists. # Depending on how journal names are entered into the # database, you might want to do some processing # before saving: journal = result.group('value') journal = journal.lower().strip() if journal.endswith("[online]"): journal = journal[:-8].rstrip() built_issns[journal] = last_issn last_doc_id = doc_id prtyp = pprint.PrettyPrinter(indent=4) prtyp.pprint(built_issns) def build_issns_from_local_site(): """ Retrieves the ISSNs from the local database. Store the "journal name -> issn" relation. Normalize journal names a little bit: - strip whithespace chars (left and right) - all lower case - remove "[Online]" suffix Print the result as Python dict structure. """ rec_id_list = perform_request_search(cc='Periodicals', of='id') built_issns = {} #built_issns = issns # Uncomment this to extend existing issns dict # (e.g. in case of manual addition) for rec_id in rec_id_list: journal_name_list = get_fieldvalues(rec_id, '210__%') issn_list = get_fieldvalues(rec_id, '022__a') if issn_list: issn = issn_list[0] # There should be only one ISSN for journal_name in journal_name_list: # Depending on how journal names are entered into the database, # you might want to do some processing before saving: journal_name = journal_name.lower().strip() if journal_name.endswith("[online]"): journal_name = journal_name[:-8].rstrip() built_issns[journal_name] = issn prtyp = pprint.PrettyPrinter(indent=4) prtyp.pprint(built_issns) def print_info(): """ Info on element arguments """ print """ Collects ISSN and corresponding journal names from local repository and prints archive as dict structure. Usage: python bfe_issn.py [Options] [url] Example: python bew_issn.py http://cdsweb.cern.ch/ Options: -h, --help print this help -u, --url the URL to collect ISSN from -v, --version print version number If 'url' is not given, collect from local database, using a faster method. Returned structure can then be copied into bfe_issn.py 'format' function. """ if __name__ == '__main__': try: opts, args = getopt.getopt(sys.argv[1:], "hu:v", ["help", "url", "version" ]) except getopt.error: print_info() sys.exit(0) url_arg = None for opt, opt_value in opts: if opt in ["-u", "--url"]: url_arg = opt_value elif opt in ["-v", "--version"]: print __revision__ sys.exit(0) else: print_info() sys.exit(0) if url_arg is not None: build_issns_from_distant_site(url_arg) else: build_issns_from_local_site() diff --git a/modules/bibharvest/lib/oai_repository_updater.py b/modules/bibharvest/lib/oai_repository_updater.py index b4d0660b4..c3110d7ce 100644 --- a/modules/bibharvest/lib/oai_repository_updater.py +++ b/modules/bibharvest/lib/oai_repository_updater.py @@ -1,545 +1,543 @@ ## This file is part of Invenio. ## Copyright (C) 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """OAI Repository administration tool - Updates the metadata of the records to include OAI identifiers and OAI SetSpec according to the settings defined in OAI Repository admin interface """ __revision__ = "$Id$" import os import sys import time if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from tempfile import mkstemp from invenio.config import \ CFG_OAI_ID_FIELD, \ CFG_OAI_ID_PREFIX, \ CFG_OAI_SET_FIELD, \ CFG_BINDIR, \ CFG_SITE_NAME, \ CFG_TMPDIR -from invenio.search_engine import \ - perform_request_search, \ - get_fieldvalues, \ - get_record +from invenio.search_engine import perform_request_search, get_record +from invenio.search_engine_utils import get_fieldvalues from invenio.intbitset import intbitset as HitSet from invenio.dbquery import run_sql from invenio.bibtask import \ task_get_option, \ task_set_option, \ write_message, \ task_update_progress, \ task_init, \ task_sleep_now_if_required from invenio.bibrecord import \ record_delete_subfield, \ field_xml_output DATAFIELD_SET_HEAD = \ "" % \ (CFG_OAI_SET_FIELD[0:3], CFG_OAI_SET_FIELD[3:4].replace('_', ' '), CFG_OAI_SET_FIELD[4:5].replace('_', ' ')) DATAFIELD_ID_HEAD = \ "" % \ (CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4].replace('_', ' '), CFG_OAI_ID_FIELD[4:5].replace('_', ' ')) def get_set_definitions(set_spec): """ Retrieve set definitions from oaiREPOSITORY table. The set definitions are the search patterns that define the records which are in the set """ set_definitions = [] query = "select setName, setDefinition from oaiREPOSITORY where setSpec=%s" res = run_sql(query, (set_spec, )) for (set_name, set_definition) in res: params = parse_set_definition(set_definition) params['setSpec'] = set_spec params['setName'] = set_name set_definitions.append(params) return set_definitions def parse_set_definition(set_definition): """ Returns the parameters for the given set definition. The returned structure is a dictionary with keys being c, p1, f1, m1, p2, f2, m2, p3, f3, m3 and corresponding values @param set_definition: a string as returned by the database for column 'setDefinition' @return: a dictionary """ params = {'c':'', 'p1':'', 'f1':'', 'm1':'', 'p2':'', 'f2':'', 'm2':'', 'p3':'', 'f3':'', 'm3':'', 'op1':'a', 'op2':'a'} definitions = set_definition.split(';') for definition in definitions: arguments = definition.split('=') if len(arguments) == 2: params[arguments[0]] = arguments[1] return params def all_set_specs(): """ Returns the list of (distinct) setSpecs defined in the settings. This also include the "empty" setSpec if any setting uses it. Note: there can be several times the same setSpec in the settings, given that a setSpec might be defined by several search queries. Here we return distinct values """ query = "SELECT DISTINCT setSpec FROM oaiREPOSITORY" res = run_sql(query) return [row[0] for row in res] def get_recids_for_set_spec(set_spec): """ Returns the list (as HitSet) of recids belonging to 'set' Parameters: set_spec - *str* the set_spec for which we would like to get the recids """ recids = HitSet() for set_def in get_set_definitions(set_spec): new_recids = perform_request_search(c=[coll.strip() \ for coll in set_def['c'].split(',')], p1=set_def['p1'], f1=set_def['f1'], m1=set_def['m1'], op1=set_def['op1'], p2=set_def['p2'], f2=set_def['f2'], m2=set_def['m2'], op2=set_def['op2'], p3=set_def['p3'], f3=set_def['f3'], m3=set_def['m3'], ap=0) recids = recids.union(HitSet(new_recids)) return recids def get_set_name_for_set_spec(set_spec): """ Returns the OAI setName of a setSpec. Note that the OAI Repository admin lets the user add several set definition with the same setSpec, and possibly with different setNames... -> Returns the first (non empty) one found. Parameters: set_spec - *str* the set_spec for which we would like to get the setName """ query = "select setName from oaiREPOSITORY where setSpec=%s and setName!=''" res = run_sql(query, (set_spec, )) if len(res) > 0: return res[0][0] else: return "" def print_repository_status(write_message=write_message, verbose=0): """ Prints the repository status to the standard output. Parameters: write_message - *function* the function used to write the output verbose - *int* the verbosity of the output - 0: print repository size - 1: print quick status of each set (numbers can be wrong if the repository is in some inconsistent state, i.e. a record is in an OAI setSpec but has not OAI ID) - 2: print detailed status of repository, with number of records that needs to be synchronized according to the sets definitions. Precise, but ~slow... """ repository_size_s = "%d" % repository_size() repository_recids_after_update = HitSet() write_message(CFG_SITE_NAME) write_message(" OAI Repository Status") set_spec_max_length = 19 # How many max char do we display for set_name_max_length = 20 # setName and setSpec? if verbose == 0: # Just print repository size write_message(" Total(**)" + " " * 29 + " " * (9 - len(repository_size_s)) + repository_size_s) return elif verbose == 1: # We display few information: show longer set name and spec set_spec_max_length = 30 set_name_max_length = 30 write_message("=" * 80) header = " setSpec" + " " * (set_spec_max_length - 7) + \ " setName" + " " * (set_name_max_length - 5) + " Volume" if verbose > 1: header += " " * 5 + "After update(*):" write_message(header) if verbose > 1: write_message(" " * 57 + "Additions Deletions") write_message("-" * 80) for set_spec in all_set_specs(): if verbose <= 1: # Get the records that are in this set. This is an # incomplete check, as it can happen that some records are # in this set (according to the metadata) but have no OAI # ID (so they are not exported). This can happen if the # repository has some records coming from external # sources, or if it has never been synchronized with this # tool. current_recids = perform_request_search(c=CFG_SITE_NAME, p1=set_spec, f1=CFG_OAI_SET_FIELD, m1="e", ap=0) nb_current_recids = len(current_recids) else: # Get the records that are *currently* exported for this # setSpec current_recids = perform_request_search(c=CFG_SITE_NAME, p1=set_spec, f1=CFG_OAI_SET_FIELD, m1="e", ap=0, op1="a", p2="oai:*", f2=CFG_OAI_ID_FIELD, m2="e") nb_current_recids = len(current_recids) # Get the records that *should* be in this set according to # the admin defined settings, and compute how many should be # added or removed should_recids = get_recids_for_set_spec(set_spec) repository_recids_after_update = repository_recids_after_update.union(should_recids) nb_add_recids = len(HitSet(should_recids).difference(HitSet(current_recids))) nb_remove_recids = len(HitSet(current_recids).difference(HitSet(should_recids))) nb_should_recids = len(should_recids) nb_recids_after_update = len(repository_recids_after_update) # Adapt setName and setSpec strings lengths set_spec_str = set_spec if len(set_spec_str) > set_spec_max_length : set_spec_str = "%s.." % set_spec_str[:set_spec_max_length] set_name_str = get_set_name_for_set_spec(set_spec) if len(set_name_str) > set_name_max_length : set_name_str = "%s.." % set_name_str[:set_name_max_length] row = " " + set_spec_str + \ " " * ((set_spec_max_length + 2) - len(set_spec_str)) + set_name_str + \ " " * ((set_name_max_length + 2) - len(set_name_str)) + \ " " * (7 - len(str(nb_current_recids))) + str(nb_current_recids) if verbose > 1: row += \ " " * max(9 - len(str(nb_add_recids)), 0) + '+' + str(nb_add_recids) + \ " " * max(7 - len(str(nb_remove_recids)), 0) + '-' + str(nb_remove_recids) + " = " +\ " " * max(7 - len(str(nb_should_recids)), 0) + str(nb_should_recids) write_message(row) write_message("=" * 80) footer = " Total(**)" + " " * (set_spec_max_length + set_name_max_length - 7) + \ " " * (9 - len(repository_size_s)) + repository_size_s if verbose > 1: footer += ' ' * (28 - len(str(nb_recids_after_update))) + str(nb_recids_after_update) write_message(footer) if verbose > 1: write_message(' *The "after update" columns show the repository after you run this tool.') else: write_message(' *"Volume" is indicative if repository is out of sync. Use --detailed-report.') write_message('**The "total" is not the sum of the above numbers, but the union of the records.') def repository_size(): "Read repository size" return len(perform_request_search(p1="oai:*", f1=CFG_OAI_ID_FIELD, m1="e", ap=0)) ### MAIN ### def oairepositoryupdater_task(): """Main business logic code of oai_archive""" no_upload = task_get_option("no_upload") report = task_get_option("report") if report > 1: print_repository_status(verbose=report) return True task_update_progress("Fetching records to process") # Build the list of records to be processed, that is, search for # the records that match one of the search queries defined in OAI # Repository admin interface. recids_for_set = {} # Remember exactly which record belongs to which set recids = HitSet() # "Flat" set of the recids_for_set values for set_spec in all_set_specs(): task_sleep_now_if_required(can_stop_too=True) _recids = get_recids_for_set_spec(set_spec) recids_for_set[set_spec] = _recids recids = recids.union(_recids) # Also get the list of records that are currently exported through # OAI and that might need to be refreshed oai_recids = perform_request_search(c=CFG_SITE_NAME, p1='oai:%s:*' % CFG_OAI_ID_PREFIX, f1=CFG_OAI_ID_FIELD, m1="e", ap=0) recids = recids.union(HitSet(oai_recids)) # Prepare to save results in a tmp file (fd, filename) = mkstemp(dir=CFG_TMPDIR, prefix='oairepository_' + \ time.strftime("%Y%m%d_%H%M%S_", time.localtime())) oai_out = os.fdopen(fd, "w") oai_out.write('') has_updated_records = False # Iterate over the recids i = 0 for recid in recids: i += 1 task_sleep_now_if_required(can_stop_too=True) task_update_progress("Done %s out of %s records." % \ (i, len(recids))) # Check if an OAI identifier is already in the record or # not. oai_id_entry = "oai:%s:%s\n" % \ (CFG_OAI_ID_FIELD[5:6], CFG_OAI_ID_PREFIX, recid) already_has_oai_id = True oai_ids = [_oai_id for _oai_id in \ get_fieldvalues(recid, CFG_OAI_ID_FIELD) \ if _oai_id.strip() != ''] if len(oai_ids) == 0: already_has_oai_id = False # Get the sets to which this record already belongs according # to the metadata current_oai_sets = set(\ [_oai_set for _oai_set in \ get_fieldvalues(recid, CFG_OAI_SET_FIELD) \ if _oai_set.strip() != '']) # Get the sets that should be in this record according to # settings updated_oai_sets = set(\ [_set for _set, _recids in recids_for_set.iteritems() if recid in _recids if _set]) # Ok, we have the old sets and the new sets. If they are equal # and oai ID does not need to be added, then great, nothing to # change . Otherwise apply the new sets. if current_oai_sets == updated_oai_sets and already_has_oai_id: continue # Jump to next recid has_updated_records = True # Generate the xml sets entry oai_set_entry = '\n'.join(["%s" % \ (CFG_OAI_SET_FIELD[5:6], _oai_set) \ for _oai_set in updated_oai_sets if \ _oai_set]) + \ "\n" # Also get all the datafields with tag and indicator matching # CFG_OAI_SET_FIELD[:5] and CFG_OAI_ID_FIELD[:5] but with # subcode != CFG_OAI_SET_FIELD[5:6] and subcode != # CFG_OAI_SET_FIELD[5:6], so that we can preserve these values other_data = marcxml_filter_out_tags(recid, [CFG_OAI_SET_FIELD, CFG_OAI_ID_FIELD]) if CFG_OAI_ID_FIELD[0:5] == CFG_OAI_SET_FIELD[0:5]: # Put set and OAI ID in the same datafield oai_out.write("\n") oai_out.write("%s" "\n" % recid) oai_out.write(DATAFIELD_ID_HEAD) oai_out.write("\n") #if oai_id_entry: oai_out.write(oai_id_entry) #if oai_set_entry: oai_out.write(oai_set_entry) oai_out.write("\n") oai_out.write(other_data) oai_out.write("\n") else: oai_out.write("\n") oai_out.write("%s" "\n" % recid) oai_out.write(DATAFIELD_ID_HEAD) oai_out.write("\n") oai_out.write(oai_id_entry) oai_out.write("\n") oai_out.write(DATAFIELD_SET_HEAD) oai_out.write("\n") oai_out.write(oai_set_entry) oai_out.write("\n") oai_out.write(other_data) oai_out.write("\n") oai_out.write('') oai_out.close() write_message("Wrote to file %s" % filename) if not no_upload: task_sleep_now_if_required(can_stop_too=True) if has_updated_records: command = "%s/bibupload -c %s -u oairepository" % (CFG_BINDIR, filename) os.system(command) else: os.remove(filename) return True def marcxml_filter_out_tags(recid, fields): """ Returns the fields of record 'recid' that share the same tag and indicators as those specified in 'fields', but for which the subfield is different. This is nice to emulate a bibupload -c that corrects only specific subfields. Parameters: recid - *int* the id of the record to process fields - *list(str)* the list of fields that we want to filter out. Eg ['909COp', '909COo'] """ out = '' record = get_record(recid) # Delete subfields that we want to replace for field in fields: record_delete_subfield(record, tag=field[0:3], ind1=field[3:4], ind2=field[4:5], subfield_code=field[5:6]) # Select only datafields that share tag + indicators processed_tags_and_ind = [] for field in fields: if not field[0:5] in processed_tags_and_ind: # Ensure that we do not process twice the same datafields processed_tags_and_ind.append(field[0:5]) for datafield in record.get(field[0:3], []): if datafield[1] == field[3:4].replace('_', ' ') and \ datafield[2] == field[4:5].replace('_', ' ') and \ datafield[0]: out += field_xml_output(datafield, field[0:3]) + '\n' return out ######################### def main(): """Main that construct all the bibtask.""" # if there is any -r or --report option (or other similar options) # in the arguments, just print the status and exit (do not run # through BibSched...) mode = -1 if '-d' in sys.argv[1:] or '--detailed-report' in sys.argv[1:]: mode = 2 elif '-r' in sys.argv[1:] or '--report' in sys.argv[1:]: mode = 1 if mode != -1: def write_message(*args): """Overload BibTask function so that it does not need to run in BibSched environment""" sys.stdout.write(args[0] + '\n') print_repository_status(write_message=write_message, verbose=mode) return task_init(authorization_action='runoairepository', authorization_msg="OAI Archive Task Submission", description="Examples:\n" " Expose records according to sets defined in OAI Repository admin interface\n" " $ oairepositoryupdater \n" " Expose records according to sets defined in OAI Repository admin interface and update them every day\n" " $ oairepositoryupdater -s24\n" " Print OAI repository status\n" " $ oairepositoryupdater -r\n" " Print OAI repository detailed status\n" " $ oairepositoryupdater -d\n\n", help_specific_usage="Options:\n" " -r --report\t\tOAI repository status\n" " -d --detailed-report\t\tOAI repository detailed status\n" " -n --no-process\tDo no upload the modifications\n", version=__revision__, specific_params=("rdn", [ "report", "detailed-report", "no-process"]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_run_fnc=oairepositoryupdater_task) def task_submit_elaborate_specific_parameter(key, value, opts, args): """Elaborate specific CLI parameters of oairepositoryupdater""" if key in ("-r", "--report"): task_set_option("report", 1) if key in ("-d", "--detailed-report"): task_set_option("report", 2) elif key in ("-n", "--no-process"): task_set_option("no_upload", 1) else: return False return True ### okay, here we go: if __name__ == '__main__': main() diff --git a/modules/bibindex/lib/bibindex_engine.py b/modules/bibindex/lib/bibindex_engine.py index 8c7526730..ffd72221b 100644 --- a/modules/bibindex/lib/bibindex_engine.py +++ b/modules/bibindex/lib/bibindex_engine.py @@ -1,1812 +1,1802 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibIndex indexing engine implementation. See bibindex executable for entry point. """ __revision__ = "$Id$" import os import re import sys import time import urllib2 import logging from invenio.config import \ CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS, \ CFG_BIBINDEX_CHARS_PUNCTUATION, \ CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY, \ CFG_BIBINDEX_MIN_WORD_LENGTH, \ CFG_BIBINDEX_REMOVE_HTML_MARKUP, \ CFG_BIBINDEX_REMOVE_LATEX_MARKUP, \ CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES, \ CFG_BIBINDEX_SYNONYM_KBRS, \ CFG_CERN_SITE, CFG_INSPIRE_SITE, \ CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES, \ CFG_BIBINDEX_SPLASH_PAGES, \ CFG_SOLR_URL from invenio.websubmit_config import CFG_WEBSUBMIT_BEST_FORMATS_TO_EXTRACT_TEXT_FROM from invenio.bibindex_engine_config import CFG_MAX_MYSQL_THREADS, \ CFG_MYSQL_THREAD_TIMEOUT, \ CFG_CHECK_MYSQL_THREADS from invenio.bibindex_engine_tokenizer import BibIndexFuzzyNameTokenizer, \ BibIndexExactNameTokenizer from invenio.bibdocfile import bibdocfile_url_p, \ bibdocfile_url_to_bibdoc, normalize_format, \ download_url, guess_format_from_url, BibRecDocs from invenio.websubmit_file_converter import convert_file, get_file_converter_logger from invenio.search_engine import perform_request_search, strip_accents, \ wash_index_term, lower_index_term, get_index_stemming_language, \ get_synonym_terms from invenio.dbquery import run_sql, DatabaseError, serialize_via_marshal, \ deserialize_via_marshal from invenio.bibindex_engine_stopwords import is_stopword from invenio.bibindex_engine_stemmer import stem from invenio.bibtask import task_init, write_message, get_datetime, \ task_set_option, task_get_option, task_get_task_param, task_update_status, \ task_update_progress, task_sleep_now_if_required from invenio.intbitset import intbitset from invenio.errorlib import register_exception from invenio.htmlutils import remove_html_markup, get_links_in_html_page from invenio.textutils import wash_for_utf8 +from invenio.search_engine_utils import get_fieldvalues if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 # FIXME: journal tag and journal pubinfo standard format are defined here: if CFG_CERN_SITE: CFG_JOURNAL_TAG = '773__%' CFG_JOURNAL_PUBINFO_STANDARD_FORM = "773__p 773__v (773__y) 773__c" elif CFG_INSPIRE_SITE: CFG_JOURNAL_TAG = '773__%' CFG_JOURNAL_PUBINFO_STANDARD_FORM = "773__p,773__v,773__c" else: CFG_JOURNAL_TAG = '909C4%' CFG_JOURNAL_PUBINFO_STANDARD_FORM = "909C4p 909C4v (909C4y) 909C4c" ## precompile some often-used regexp for speed reasons: re_subfields = re.compile('\$\$\w') re_block_punctuation_begin = re.compile(r"^"+CFG_BIBINDEX_CHARS_PUNCTUATION+"+") re_block_punctuation_end = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION+"+$") re_punctuation = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION) re_separators = re.compile(CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS) re_datetime_shift = re.compile("([-\+]{0,1})([\d]+)([dhms])") re_arxiv = re.compile(r'^arxiv:\d\d\d\d\.\d\d\d\d') nb_char_in_line = 50 # for verbose pretty printing chunksize = 1000 # default size of chunks that the records will be treated by base_process_size = 4500 # process base size _last_word_table = None def list_union(list1, list2): "Returns union of the two lists." union_dict = {} for e in list1: union_dict[e] = 1 for e in list2: union_dict[e] = 1 return union_dict.keys() ## safety function for killing slow DB threads: def kill_sleepy_mysql_threads(max_threads=CFG_MAX_MYSQL_THREADS, thread_timeout=CFG_MYSQL_THREAD_TIMEOUT): """Check the number of DB threads and if there are more than MAX_THREADS of them, lill all threads that are in a sleeping state for more than THREAD_TIMEOUT seconds. (This is useful for working around the the max_connection problem that appears during indexation in some not-yet-understood cases.) If some threads are to be killed, write info into the log file. """ res = run_sql("SHOW FULL PROCESSLIST") if len(res) > max_threads: for row in res: r_id, dummy, dummy, dummy, r_command, r_time, dummy, dummy = row if r_command == "Sleep" and int(r_time) > thread_timeout: run_sql("KILL %s", (r_id,)) write_message("WARNING: too many DB threads, killing thread %s" % r_id, verbose=1) return -## MARC-21 tag/field access functions -def get_fieldvalues(recID, tag): - """Returns list of values of the MARC-21 'tag' fields for the record - 'recID'.""" - bibXXx = "bib" + tag[0] + tag[1] + "x" - bibrec_bibXXx = "bibrec_" + bibXXx - query = "SELECT value FROM %s AS b, %s AS bb WHERE bb.id_bibrec=%%s AND bb.id_bibxxx=b.id AND tag LIKE %%s" \ - % (bibXXx, bibrec_bibXXx) - res = run_sql(query, (recID, tag)) - return [row[0] for row in res] - def get_associated_subfield_value(recID, tag, value, associated_subfield_code): """Return list of ASSOCIATED_SUBFIELD_CODE, if exists, for record RECID and TAG of value VALUE. Used by fulltext indexer only. Note: TAG must be 6 characters long (tag+ind1+ind2+sfcode), otherwise en empty string is returned. FIXME: what if many tag values have the same value but different associated_subfield_code? Better use bibrecord library for this. """ out = "" if len(tag) != 6: return out bibXXx = "bib" + tag[0] + tag[1] + "x" bibrec_bibXXx = "bibrec_" + bibXXx query = """SELECT bb.field_number, b.tag, b.value FROM %s AS b, %s AS bb WHERE bb.id_bibrec=%%s AND bb.id_bibxxx=b.id AND tag LIKE %%s%%""" % (bibXXx, bibrec_bibXXx) res = run_sql(query, (recID, tag[:-1])) field_number = -1 for row in res: if row[1] == tag and row[2] == value: field_number = row[0] if field_number > 0: for row in res: if row[0] == field_number and row[1] == tag[:-1] + associated_subfield_code: out = row[2] break return out def get_field_tags(field): """Returns a list of MARC tags for the field code 'field'. Returns empty list in case of error. Example: field='author', output=['100__%','700__%'].""" out = [] query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag ORDER BY ft.score DESC""" res = run_sql(query, (field, )) return [row[0] for row in res] def get_words_from_journal_tag(recID, tag): """ Special procedure to extract words from journal tags. Joins title/volume/year/page into a standard form that is also used for citations. """ # get all journal tags/subfields: bibXXx = "bib" + tag[0] + tag[1] + "x" bibrec_bibXXx = "bibrec_" + bibXXx query = """SELECT bb.field_number,b.tag,b.value FROM %s AS b, %s AS bb WHERE bb.id_bibrec=%%s AND bb.id_bibxxx=b.id AND tag LIKE %%s""" % (bibXXx, bibrec_bibXXx) res = run_sql(query, (recID, tag)) # construct journal pubinfo: dpubinfos = {} for row in res: nb_instance, subfield, value = row if subfield.endswith("c"): # delete pageend if value is pagestart-pageend # FIXME: pages may not be in 'c' subfield value = value.split('-', 1)[0] if dpubinfos.has_key(nb_instance): dpubinfos[nb_instance][subfield] = value else: dpubinfos[nb_instance] = {subfield: value} # construct standard format: lwords = [] for dpubinfo in dpubinfos.values(): # index all journal subfields separately for tag,val in dpubinfo.items(): lwords.append(val) # index journal standard format: pubinfo = CFG_JOURNAL_PUBINFO_STANDARD_FORM for tag,val in dpubinfo.items(): pubinfo = pubinfo.replace(tag,val) if CFG_JOURNAL_TAG[:-1] in pubinfo: # some subfield was missing, do nothing pass else: lwords.append(pubinfo) # return list of words and pubinfos: return lwords def get_author_canonical_ids_for_recid(recID): """ Return list of author canonical IDs (e.g. `J.Ellis.1') for the given record. Done by consulting BibAuthorID module. """ from invenio.bibauthorid_personid_tables_utils import get_persons_from_recids lwords = [] res = get_persons_from_recids([recID]) if res is None: ## BibAuthorID is not enabled return lwords else: dpersons, dpersoninfos = res for aid in dpersoninfos.keys(): author_canonical_id = dpersoninfos[aid].get('canonical_id', '') if author_canonical_id: lwords.append(author_canonical_id) return lwords def get_words_from_date_tag(datestring, stemming_language=None): """ Special procedure to index words from tags storing date-like information in format YYYY or YYYY-MM or YYYY-MM-DD. Namely, we are indexing word-terms YYYY, YYYY-MM, YYYY-MM-DD, but never standalone MM or DD. """ out = [] for dateword in datestring.split(): # maybe there are whitespaces, so break these too out.append(dateword) parts = dateword.split('-') for nb in range(1,len(parts)): out.append("-".join(parts[:nb])) return out def get_words_from_fulltext(url_direct_or_indirect, stemming_language=None): """Returns all the words contained in the document specified by URL_DIRECT_OR_INDIRECT with the words being split by various SRE_SEPARATORS regexp set earlier. If FORCE_FILE_EXTENSION is set (e.g. to "pdf", then treat URL_DIRECT_OR_INDIRECT as a PDF file. (This is interesting to index Indico for example.) Note also that URL_DIRECT_OR_INDIRECT may be either a direct URL to the fulltext file or an URL to a setlink-like page body that presents the links to be indexed. In the latter case the URL_DIRECT_OR_INDIRECT is parsed to extract actual direct URLs to fulltext documents, for all knows file extensions as specified by global CONV_PROGRAMS config variable. """ re_perform_ocr = re.compile(CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES) write_message("... reading fulltext files from %s started" % url_direct_or_indirect, verbose=2) try: if bibdocfile_url_p(url_direct_or_indirect): write_message("... %s is an internal document" % url_direct_or_indirect, verbose=2) bibdoc = bibdocfile_url_to_bibdoc(url_direct_or_indirect) perform_ocr = bool(re_perform_ocr.match(bibdoc.get_docname())) write_message("... will extract words from %s (docid: %s) %s" % (bibdoc.get_docname(), bibdoc.get_id(), perform_ocr and 'with OCR' or ''), verbose=2) if not bibdoc.has_text(require_up_to_date=True): bibdoc.extract_text(perform_ocr=perform_ocr) if CFG_SOLR_URL: # we are relying on Solr to provide full-text indexing, so do # nothing here (FIXME: dispatch indexing to Solr) return [] else: return get_words_from_phrase(bibdoc.get_text(), stemming_language) else: if CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY: write_message("... %s is external URL but indexing only local files" % url_direct_or_indirect, verbose=2) return [] write_message("... %s is an external URL" % url_direct_or_indirect, verbose=2) urls_to_index = set() for splash_re, url_re in CFG_BIBINDEX_SPLASH_PAGES.iteritems(): if re.match(splash_re, url_direct_or_indirect): write_message("... %s is a splash page (%s)" % (url_direct_or_indirect, splash_re), verbose=2) html = urllib2.urlopen(url_direct_or_indirect).read() urls = get_links_in_html_page(html) write_message("... found these URLs in %s splash page: %s" % (url_direct_or_indirect, ", ".join(urls)), verbose=3) for url in urls: if re.match(url_re, url): write_message("... will index %s (matched by %s)" % (url, url_re), verbose=2) urls_to_index.add(url) if not urls_to_index: urls_to_index.add(url_direct_or_indirect) write_message("... will extract words from %s" % ', '.join(urls_to_index), verbose=2) words = {} for url in urls_to_index: format = guess_format_from_url(url) write_message("... %s format was guessed for %s" % (format, url), verbose=3) tmpdoc = download_url(url, format) file_converter_logger = get_file_converter_logger() old_logging_level = file_converter_logger.getEffectiveLevel() if task_get_task_param("verbose") > 3: file_converter_logger.setLevel(logging.DEBUG) try: try: tmptext = convert_file(tmpdoc, output_format='.txt') text = open(tmptext).read() os.remove(tmptext) if CFG_SOLR_URL: # we are relying on Solr to provide full-text indexing, so do # nothing here (FIXME: dispatch indexing to Solr) tmpwords = [] else: tmpwords = get_words_from_phrase(text, stemming_language) words.update(dict(map(lambda x: (x, 1), tmpwords))) except Exception, e: message = 'ERROR: it\'s impossible to correctly extract words from %s referenced by %s: %s' % (url, url_direct_or_indirect, e) register_exception(prefix=message, alert_admin=True) write_message(message, stream=sys.stderr) finally: os.remove(tmpdoc) if task_get_task_param("verbose") > 3: file_converter_logger.setLevel(old_logging_level) return words.keys() except Exception, e: message = 'ERROR: it\'s impossible to correctly extract words from %s: %s' % (url_direct_or_indirect, e) register_exception(prefix=message, alert_admin=True) write_message(message, stream=sys.stderr) return [] latex_markup_re = re.compile(r"\\begin(\[.+?\])?\{.+?\}|\\end\{.+?}|\\\w+(\[.+?\])?\{(?P.*?)\}|\{\\\w+ (?P.*?)\}") def remove_latex_markup(phrase): ret_phrase = '' index = 0 for match in latex_markup_re.finditer(phrase): ret_phrase += phrase[index:match.start()] ret_phrase += match.group('inside1') or match.group('inside2') or '' index = match.end() ret_phrase += phrase[index:] return ret_phrase def get_nothing_from_phrase(phrase, stemming_language=None): """ A dump implementation of get_words_from_phrase to be used when when a tag should not be indexed (such as when trying to extract phrases from 8564_u).""" return [] def swap_temporary_reindex_tables(index_id, reindex_prefix="tmp_"): """Atomically swap reindexed temporary table with the original one. Delete the now-old one.""" write_message("Putting new tmp index tables for id %s into production" % index_id) run_sql( "RENAME TABLE " + "idxWORD%02dR TO old_idxWORD%02dR," % (index_id, index_id) + "%sidxWORD%02dR TO idxWORD%02dR," % (reindex_prefix, index_id, index_id) + "idxWORD%02dF TO old_idxWORD%02dF," % (index_id, index_id) + "%sidxWORD%02dF TO idxWORD%02dF," % (reindex_prefix, index_id, index_id) + "idxPAIR%02dR TO old_idxPAIR%02dR," % (index_id, index_id) + "%sidxPAIR%02dR TO idxPAIR%02dR," % (reindex_prefix, index_id, index_id) + "idxPAIR%02dF TO old_idxPAIR%02dF," % (index_id, index_id) + "%sidxPAIR%02dF TO idxPAIR%02dF," % (reindex_prefix, index_id, index_id) + "idxPHRASE%02dR TO old_idxPHRASE%02dR," % (index_id, index_id) + "%sidxPHRASE%02dR TO idxPHRASE%02dR," % (reindex_prefix, index_id, index_id) + "idxPHRASE%02dF TO old_idxPHRASE%02dF," % (index_id, index_id) + "%sidxPHRASE%02dF TO idxPHRASE%02dF;" % (reindex_prefix, index_id, index_id) ) write_message("Dropping old index tables for id %s" % index_id) run_sql("DROP TABLE old_idxWORD%02dR, old_idxWORD%02dF, old_idxPAIR%02dR, old_idxPAIR%02dF, old_idxPHRASE%02dR, old_idxPHRASE%02dF" % (index_id, index_id, index_id, index_id, index_id, index_id) ) def init_temporary_reindex_tables(index_id, reindex_prefix="tmp_"): """Create reindexing temporary tables.""" write_message("Creating new tmp index tables for id %s" % index_id) run_sql("""DROP TABLE IF EXISTS %sidxWORD%02dF""" % (reindex_prefix, index_id)) run_sql("""CREATE TABLE %sidxWORD%02dF ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM""" % (reindex_prefix, index_id)) run_sql("""DROP TABLE IF EXISTS %sidxWORD%02dR""" % (reindex_prefix, index_id)) run_sql("""CREATE TABLE %sidxWORD%02dR ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM""" % (reindex_prefix, index_id)) run_sql("""DROP TABLE IF EXISTS %sidxPAIR%02dF""" % (reindex_prefix, index_id)) run_sql("""CREATE TABLE %sidxPAIR%02dF ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM""" % (reindex_prefix, index_id)) run_sql("""DROP TABLE IF EXISTS %sidxPAIR%02dR""" % (reindex_prefix, index_id)) run_sql("""CREATE TABLE %sidxPAIR%02dR ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM""" % (reindex_prefix, index_id)) run_sql("""DROP TABLE IF EXISTS %sidxPHRASE%02dF""" % (reindex_prefix, index_id)) run_sql("""CREATE TABLE %sidxPHRASE%02dF ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM""" % (reindex_prefix, index_id)) run_sql("""DROP TABLE IF EXISTS %sidxPHRASE%02dR""" % (reindex_prefix, index_id)) run_sql("""CREATE TABLE %sidxPHRASE%02dR ( id_bibrec mediumint(9) unsigned NOT NULL default '0', termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM""" % (reindex_prefix, index_id)) run_sql("UPDATE idxINDEX SET last_updated='0000-00-00 00:00:00' WHERE id=%s", (index_id,)) latex_formula_re = re.compile(r'\$.*?\$|\\\[.*?\\\]') def get_words_from_phrase(phrase, stemming_language=None): """Return list of words found in PHRASE. Note that the phrase is split into groups depending on the alphanumeric characters and punctuation characters definition present in the config file. """ words = {} formulas = [] if CFG_BIBINDEX_REMOVE_HTML_MARKUP and phrase.find(" -1: phrase = remove_html_markup(phrase) if CFG_BIBINDEX_REMOVE_LATEX_MARKUP: formulas = latex_formula_re.findall(phrase) phrase = remove_latex_markup(phrase) phrase = latex_formula_re.sub(' ', phrase) phrase = wash_for_utf8(phrase) phrase = lower_index_term(phrase) # 1st split phrase into blocks according to whitespace for block in strip_accents(phrase).split(): # 2nd remove leading/trailing punctuation and add block: block = re_block_punctuation_begin.sub("", block) block = re_block_punctuation_end.sub("", block) if block: if stemming_language: block = apply_stemming_and_stopwords_and_length_check(block, stemming_language) if block: words[block] = 1 if re_arxiv.match(block): # special case for blocks like `arXiv:1007.5048' where # we would like to index the part after the colon # regardless of dot or other punctuation characters: words[block.split(':', 1)[1]] = 1 # 3rd break each block into subblocks according to punctuation and add subblocks: for subblock in re_punctuation.split(block): if stemming_language: subblock = apply_stemming_and_stopwords_and_length_check(subblock, stemming_language) if subblock: words[subblock] = 1 # 4th break each subblock into alphanumeric groups and add groups: for alphanumeric_group in re_separators.split(subblock): if stemming_language: alphanumeric_group = apply_stemming_and_stopwords_and_length_check(alphanumeric_group, stemming_language) if alphanumeric_group: words[alphanumeric_group] = 1 for block in formulas: words[block] = 1 return words.keys() def get_pairs_from_phrase(phrase, stemming_language=None): """Return list of words found in PHRASE. Note that the phrase is split into groups depending on the alphanumeric characters and punctuation characters definition present in the config file. """ words = {} if CFG_BIBINDEX_REMOVE_HTML_MARKUP and phrase.find(" -1: phrase = remove_html_markup(phrase) if CFG_BIBINDEX_REMOVE_LATEX_MARKUP: phrase = remove_latex_markup(phrase) phrase = latex_formula_re.sub(' ', phrase) phrase = wash_for_utf8(phrase) phrase = lower_index_term(phrase) # 1st split phrase into blocks according to whitespace last_word = '' for block in strip_accents(phrase).split(): # 2nd remove leading/trailing punctuation and add block: block = re_block_punctuation_begin.sub("", block) block = re_block_punctuation_end.sub("", block) if block: if stemming_language: block = apply_stemming_and_stopwords_and_length_check(block, stemming_language) # 3rd break each block into subblocks according to punctuation and add subblocks: for subblock in re_punctuation.split(block): if stemming_language: subblock = apply_stemming_and_stopwords_and_length_check(subblock, stemming_language) if subblock: # 4th break each subblock into alphanumeric groups and add groups: for alphanumeric_group in re_separators.split(subblock): if stemming_language: alphanumeric_group = apply_stemming_and_stopwords_and_length_check(alphanumeric_group, stemming_language) if alphanumeric_group: if last_word: words['%s %s' % (last_word, alphanumeric_group)] = 1 last_word = alphanumeric_group return words.keys() phrase_delimiter_re = re.compile(r'[\.:;\?\!]') space_cleaner_re = re.compile(r'\s+') def get_phrases_from_phrase(phrase, stemming_language=None): """Return list of phrases found in PHRASE. Note that the phrase is split into groups depending on the alphanumeric characters and punctuation characters definition present in the config file. """ phrase = wash_for_utf8(phrase) return [phrase] ## Note that we don't break phrases, they are used for exact style ## of searching. words = {} phrase = strip_accents(phrase) # 1st split phrase into blocks according to whitespace for block1 in phrase_delimiter_re.split(strip_accents(phrase)): block1 = block1.strip() if block1 and stemming_language: new_words = [] for block2 in re_punctuation.split(block1): block2 = block2.strip() if block2: for block3 in block2.split(): block3 = block3.strip() if block3: # Note that we don't stem phrases, they # are used for exact style of searching. new_words.append(block3) block1 = ' '.join(new_words) if block1: words[block1] = 1 return words.keys() def get_fuzzy_authors_from_phrase(phrase, stemming_language=None): """ Return list of fuzzy phrase-tokens suitable for storing into author phrase index. """ author_tokenizer = BibIndexFuzzyNameTokenizer() return author_tokenizer.tokenize(phrase) def get_exact_authors_from_phrase(phrase, stemming_language=None): """ Return list of exact phrase-tokens suitable for storing into exact author phrase index. """ author_tokenizer = BibIndexExactNameTokenizer() return author_tokenizer.tokenize(phrase) def get_author_family_name_words_from_phrase(phrase, stemming_language=None): """ Return list of words from author family names, not his/her first names. The phrase is assumed to be the full author name. This is useful for CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES. """ d_family_names = {} # first, treat everything before first comma as surname: if ',' in phrase: d_family_names[phrase.split(',', 1)[0]] = 1 # second, try fuzzy author tokenizer to find surname variants: for name in get_fuzzy_authors_from_phrase(phrase, stemming_language): if ',' in name: d_family_names[name.split(',', 1)[0]] = 1 # now extract words from these surnames: d_family_names_words = {} for family_name in d_family_names.keys(): for word in get_words_from_phrase(family_name, stemming_language): d_family_names_words[word] = 1 return d_family_names_words.keys() def apply_stemming_and_stopwords_and_length_check(word, stemming_language): """Return WORD after applying stemming and stopword and length checks. See the config file in order to influence these. """ # now check against stopwords: if is_stopword(word): return "" # finally check the word length: if len(word) < CFG_BIBINDEX_MIN_WORD_LENGTH: return "" # stem word, when configured so: if stemming_language: word = stem(word, stemming_language) return word def remove_subfields(s): "Removes subfields from string, e.g. 'foo $$c bar' becomes 'foo bar'." return re_subfields.sub(' ', s) def get_index_id_from_index_name(index_name): """Returns the words/phrase index id for INDEXNAME. Returns empty string in case there is no words table for this index. Example: field='author', output=4.""" out = 0 query = """SELECT w.id FROM idxINDEX AS w WHERE w.name=%s LIMIT 1""" res = run_sql(query, (index_name, ), 1) if res: out = res[0][0] return out def get_index_name_from_index_id(index_id): """Returns the words/phrase index name for INDEXID. Returns '' in case there is no words table for this indexid. Example: field=9, output='fulltext'.""" res = run_sql("SELECT name FROM idxINDEX WHERE id=%s", (index_id, )) if res: return res[0][0] return '' def get_index_tags(indexname): """Returns the list of tags that are indexed inside INDEXNAME. Returns empty list in case there are no tags indexed in this index. Note: uses get_field_tags() defined before. Example: field='author', output=['100__%', '700__%'].""" out = [] query = """SELECT f.code FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f WHERE w.name=%s AND w.id=wf.id_idxINDEX AND f.id=wf.id_field""" res = run_sql(query, (indexname, )) for row in res: out.extend(get_field_tags(row[0])) return out def get_all_indexes(): """Returns the list of the names of all defined words indexes. Returns empty list in case there are no tags indexed in this index. Example: output=['global', 'author'].""" out = [] query = """SELECT name FROM idxINDEX""" res = run_sql(query) for row in res: out.append(row[0]) return out def split_ranges(parse_string): """Parse a string a return the list or ranges.""" recIDs = [] ranges = parse_string.split(",") for arange in ranges: tmp_recIDs = arange.split("-") if len(tmp_recIDs)==1: recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])]) else: if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check tmp = tmp_recIDs[0] tmp_recIDs[0] = tmp_recIDs[1] tmp_recIDs[1] = tmp recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])]) return recIDs def get_word_tables(tables): """ Given a list of table names it return a list of tuples (index_id, index_name, index_tags). If tables is empty it returns the whole list.""" wordTables = [] if tables: indexes = tables.split(",") for index in indexes: index_id = get_index_id_from_index_name(index) if index_id: wordTables.append((index_id, index, get_index_tags(index))) else: write_message("Error: There is no %s words table." % index, sys.stderr) else: for index in get_all_indexes(): index_id = get_index_id_from_index_name(index) wordTables.append((index_id, index, get_index_tags(index))) return wordTables def get_date_range(var): "Returns the two dates contained as a low,high tuple" limits = var.split(",") if len(limits)==1: low = get_datetime(limits[0]) return low, None if len(limits)==2: low = get_datetime(limits[0]) high = get_datetime(limits[1]) return low, high return None, None def create_range_list(res): """Creates a range list from a recID select query result contained in res. The result is expected to have ascending numerical order.""" if not res: return [] row = res[0] if not row: return [] else: range_list = [[row, row]] for row in res[1:]: row_id = row if row_id == range_list[-1][1] + 1: range_list[-1][1] = row_id else: range_list.append([row_id, row_id]) return range_list def beautify_range_list(range_list): """Returns a non overlapping, maximal range list""" ret_list = [] for new in range_list: found = 0 for old in ret_list: if new[0] <= old[0] <= new[1] + 1 or new[0] - 1 <= old[1] <= new[1]: old[0] = min(old[0], new[0]) old[1] = max(old[1], new[1]) found = 1 break if not found: ret_list.append(new) return ret_list def truncate_index_table(index_name): """Properly truncate the given index.""" index_id = get_index_id_from_index_name(index_name) if index_id: write_message('Truncating %s index table in order to reindex.' % index_name, verbose=2) run_sql("UPDATE idxINDEX SET last_updated='0000-00-00 00:00:00' WHERE id=%s", (index_id,)) run_sql("TRUNCATE idxWORD%02dF" % index_id) run_sql("TRUNCATE idxWORD%02dR" % index_id) run_sql("TRUNCATE idxPHRASE%02dF" % index_id) run_sql("TRUNCATE idxPHRASE%02dR" % index_id) def update_index_last_updated(index_id, starting_time=None): """Update last_updated column of the index table in the database. Puts starting time there so that if the task was interrupted for record download, the records will be reindexed next time.""" if starting_time is None: return None write_message("updating last_updated to %s..." % starting_time, verbose=9) return run_sql("UPDATE idxINDEX SET last_updated=%s WHERE id=%s", (starting_time, index_id,)) #def update_text_extraction_date(first_recid, last_recid): #"""for all the bibdoc connected to the specified recid, set #the text_extraction_date to the task_starting_time.""" #run_sql("UPDATE bibdoc JOIN bibrec_bibdoc ON id=id_bibdoc SET text_extraction_date=%s WHERE id_bibrec BETWEEN %s AND %s", (task_get_task_param('task_starting_time'), first_recid, last_recid)) class WordTable: "A class to hold the words table." def __init__(self, index_name, index_id, fields_to_index, table_name_pattern, default_get_words_fnc, tag_to_words_fnc_map, wash_index_terms=50, is_fulltext_index=False): """Creates words table instance. @param index_name: the index name @param index_id: the index integer identificator @param fields_to_index: a list of fields to index @param table_name_pattern: i.e. idxWORD%02dF or idxPHRASE%02dF @parm default_get_words_fnc: the default function called to extract words from a metadata @param tag_to_words_fnc_map: a mapping to specify particular function to extract words from particular metdata (such as 8564_u) @param wash_index_terms: do we wash index terms, and if yes (when >0), how many characters do we keep in the index terms; see max_char_length parameter of wash_index_term() """ self.index_name = index_name self.index_id = index_id self.tablename = table_name_pattern % index_id self.recIDs_in_mem = [] self.fields_to_index = fields_to_index self.value = {} self.stemming_language = get_index_stemming_language(index_id) self.is_fulltext_index = is_fulltext_index self.wash_index_terms = wash_index_terms # tagToFunctions mapping. It offers an indirection level necessary for # indexing fulltext. The default is get_words_from_phrase self.tag_to_words_fnc_map = tag_to_words_fnc_map self.default_get_words_fnc = default_get_words_fnc if self.stemming_language and self.tablename.startswith('idxWORD'): write_message('%s has stemming enabled, language %s' % (self.tablename, self.stemming_language)) def get_field(self, recID, tag): """Returns list of values of the MARC-21 'tag' fields for the record 'recID'.""" out = [] bibXXx = "bib" + tag[0] + tag[1] + "x" bibrec_bibXXx = "bibrec_" + bibXXx query = """SELECT value FROM %s AS b, %s AS bb WHERE bb.id_bibrec=%%s AND bb.id_bibxxx=b.id AND tag LIKE %%s""" % (bibXXx, bibrec_bibXXx) res = run_sql(query, (recID, tag)) for row in res: out.append(row[0]) return out def clean(self): "Cleans the words table." self.value = {} def put_into_db(self, mode="normal"): """Updates the current words table in the corresponding DB idxFOO table. Mode 'normal' means normal execution, mode 'emergency' means words index reverting to old state. """ write_message("%s %s wordtable flush started" % (self.tablename, mode)) write_message('...updating %d words into %s started' % \ (len(self.value), self.tablename)) task_update_progress("%s flushed %d/%d words" % (self.tablename, 0, len(self.value))) self.recIDs_in_mem = beautify_range_list(self.recIDs_in_mem) if mode == "normal": for group in self.recIDs_in_mem: query = """UPDATE %sR SET type='TEMPORARY' WHERE id_bibrec BETWEEN %%s AND %%s AND type='CURRENT'""" % self.tablename[:-1] write_message(query % (group[0], group[1]), verbose=9) run_sql(query, (group[0], group[1])) nb_words_total = len(self.value) nb_words_report = int(nb_words_total/10.0) nb_words_done = 0 for word in self.value.keys(): self.put_word_into_db(word) nb_words_done += 1 if nb_words_report != 0 and ((nb_words_done % nb_words_report) == 0): write_message('......processed %d/%d words' % (nb_words_done, nb_words_total)) task_update_progress("%s flushed %d/%d words" % (self.tablename, nb_words_done, nb_words_total)) write_message('...updating %d words into %s ended' % \ (nb_words_total, self.tablename)) write_message('...updating reverse table %sR started' % self.tablename[:-1]) if mode == "normal": for group in self.recIDs_in_mem: query = """UPDATE %sR SET type='CURRENT' WHERE id_bibrec BETWEEN %%s AND %%s AND type='FUTURE'""" % self.tablename[:-1] write_message(query % (group[0], group[1]), verbose=9) run_sql(query, (group[0], group[1])) query = """DELETE FROM %sR WHERE id_bibrec BETWEEN %%s AND %%s AND type='TEMPORARY'""" % self.tablename[:-1] write_message(query % (group[0], group[1]), verbose=9) run_sql(query, (group[0], group[1])) #if self.is_fulltext_index: #update_text_extraction_date(group[0], group[1]) write_message('End of updating wordTable into %s' % self.tablename, verbose=9) elif mode == "emergency": for group in self.recIDs_in_mem: query = """UPDATE %sR SET type='CURRENT' WHERE id_bibrec BETWEEN %%s AND %%s AND type='TEMPORARY'""" % self.tablename[:-1] write_message(query % (group[0], group[1]), verbose=9) run_sql(query, (group[0], group[1])) query = """DELETE FROM %sR WHERE id_bibrec BETWEEN %%s AND %%s AND type='FUTURE'""" % self.tablename[:-1] write_message(query % (group[0], group[1]), verbose=9) run_sql(query, (group[0], group[1])) write_message('End of emergency flushing wordTable into %s' % self.tablename, verbose=9) write_message('...updating reverse table %sR ended' % self.tablename[:-1]) self.clean() self.recIDs_in_mem = [] write_message("%s %s wordtable flush ended" % (self.tablename, mode)) task_update_progress("%s flush ended" % (self.tablename)) def load_old_recIDs(self, word): """Load existing hitlist for the word from the database index files.""" query = "SELECT hitlist FROM %s WHERE term=%%s" % self.tablename res = run_sql(query, (word,)) if res: return intbitset(res[0][0]) else: return None def merge_with_old_recIDs(self, word, set): """Merge the system numbers stored in memory (hash of recIDs with value +1 or -1 according to whether to add/delete them) with those stored in the database index and received in set universe of recIDs for the given word. Return False in case no change was done to SET, return True in case SET was changed. """ oldset = intbitset(set) set.update_with_signs(self.value[word]) return set != oldset def put_word_into_db(self, word): """Flush a single word to the database and delete it from memory""" set = self.load_old_recIDs(word) if set is not None: # merge the word recIDs found in memory: if not self.merge_with_old_recIDs(word,set): # nothing to update: write_message("......... unchanged hitlist for ``%s''" % word, verbose=9) pass else: # yes there were some new words: write_message("......... updating hitlist for ``%s''" % word, verbose=9) run_sql("UPDATE %s SET hitlist=%%s WHERE term=%%s" % self.tablename, (set.fastdump(), word)) else: # the word is new, will create new set: write_message("......... inserting hitlist for ``%s''" % word, verbose=9) set = intbitset(self.value[word].keys()) try: run_sql("INSERT INTO %s (term, hitlist) VALUES (%%s, %%s)" % self.tablename, (word, set.fastdump())) except Exception, e: ## We send this exception to the admin only when is not ## already reparing the problem. register_exception(prefix="Error when putting the term '%s' into db (hitlist=%s): %s\n" % (repr(word), set, e), alert_admin=(task_get_option('cmd') != 'repair')) if not set: # never store empty words run_sql("DELETE from %s WHERE term=%%s" % self.tablename, (word,)) del self.value[word] def display(self): "Displays the word table." keys = self.value.keys() keys.sort() for k in keys: write_message("%s: %s" % (k, self.value[k])) def count(self): "Returns the number of words in the table." return len(self.value) def info(self): "Prints some information on the words table." write_message("The words table contains %d words." % self.count()) def lookup_words(self, word=""): "Lookup word from the words table." if not word: done = 0 while not done: try: word = raw_input("Enter word: ") done = 1 except (EOFError, KeyboardInterrupt): return if self.value.has_key(word): write_message("The word '%s' is found %d times." \ % (word, len(self.value[word]))) else: write_message("The word '%s' does not exist in the word file."\ % word) def add_recIDs(self, recIDs, opt_flush): """Fetches records which id in the recIDs range list and adds them to the wordTable. The recIDs range list is of the form: [[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]]. """ global chunksize, _last_word_table flush_count = 0 records_done = 0 records_to_go = 0 for arange in recIDs: records_to_go = records_to_go + arange[1] - arange[0] + 1 time_started = time.time() # will measure profile time for arange in recIDs: i_low = arange[0] chunksize_count = 0 while i_low <= arange[1]: # calculate chunk group of recIDs and treat it: i_high = min(i_low+opt_flush-flush_count-1,arange[1]) i_high = min(i_low+chunksize-chunksize_count-1, i_high) try: self.chk_recID_range(i_low, i_high) except StandardError, e: write_message("Exception caught: %s" % e, sys.stderr) register_exception(alert_admin=True) task_update_status("ERROR") self.put_into_db() sys.exit(1) write_message("%s adding records #%d-#%d started" % \ (self.tablename, i_low, i_high)) if CFG_CHECK_MYSQL_THREADS: kill_sleepy_mysql_threads() task_update_progress("%s adding recs %d-%d" % (self.tablename, i_low, i_high)) self.del_recID_range(i_low, i_high) just_processed = self.add_recID_range(i_low, i_high) flush_count = flush_count + i_high - i_low + 1 chunksize_count = chunksize_count + i_high - i_low + 1 records_done = records_done + just_processed write_message("%s adding records #%d-#%d ended " % \ (self.tablename, i_low, i_high)) if chunksize_count >= chunksize: chunksize_count = 0 # flush if necessary: if flush_count >= opt_flush: self.put_into_db() self.clean() write_message("%s backing up" % (self.tablename)) flush_count = 0 self.log_progress(time_started,records_done,records_to_go) # iterate: i_low = i_high + 1 if flush_count > 0: self.put_into_db() self.log_progress(time_started,records_done,records_to_go) def add_recIDs_by_date(self, dates, opt_flush): """Add records that were modified between DATES[0] and DATES[1]. If DATES is not set, then add records that were modified since the last update of the index. """ if not dates: table_id = self.tablename[-3:-1] query = """SELECT last_updated FROM idxINDEX WHERE id=%s""" res = run_sql(query, (table_id, )) if not res: return if not res[0][0]: dates = ("0000-00-00", None) else: dates = (res[0][0], None) if dates[1] is None: res = intbitset(run_sql("""SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s""", (dates[0],))) if self.is_fulltext_index: res |= intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE text_extraction_date <= modification_date AND modification_date >= %s AND status<>'DELETED'""", (dates[0], ))) elif dates[0] is None: res = intbitset(run_sql("""SELECT b.id FROM bibrec AS b WHERE b.modification_date <= %s""", (dates[1],))) if self.is_fulltext_index: res |= intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE text_extraction_date <= modification_date AND modification_date <= %s AND status<>'DELETED'""", (dates[1], ))) else: res = intbitset(run_sql("""SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s AND b.modification_date <= %s""", (dates[0], dates[1]))) if self.is_fulltext_index: res |= intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE text_extraction_date <= modification_date AND modification_date >= %s AND modification_date <= %s AND status<>'DELETED'""", (dates[0], dates[1], ))) alist = create_range_list(list(res)) if not alist: write_message( "No new records added. %s is up to date" % self.tablename) else: self.add_recIDs(alist, opt_flush) # special case of author indexes where we need to re-index # those records that were affected by changed BibAuthorID # attributions: if self.index_name in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor'): from invenio.bibauthorid_personid_tables_utils import get_recids_affected_since # dates[1] is ignored, since BibAuthorID API does not offer upper limit search alist = create_range_list(get_recids_affected_since(dates[0])) if not alist: write_message( "No new records added by author canonical IDs. %s is up to date" % self.tablename) else: self.add_recIDs(alist, opt_flush) def add_recID_range(self, recID1, recID2): """Add records from RECID1 to RECID2.""" wlist = {} self.recIDs_in_mem.append([recID1,recID2]) # special case of author indexes where we also add author # canonical IDs: if self.index_name in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor'): for recID in range(recID1, recID2 + 1): if not wlist.has_key(recID): wlist[recID] = [] wlist[recID] = list_union(get_author_canonical_ids_for_recid(recID), wlist[recID]) # special case of journal index: if self.fields_to_index == [CFG_JOURNAL_TAG]: # FIXME: quick hack for the journal index; a special # treatment where we need to associate more than one # subfield into indexed term for recID in range(recID1, recID2 + 1): new_words = get_words_from_journal_tag(recID, self.fields_to_index[0]) if not wlist.has_key(recID): wlist[recID] = [] wlist[recID] = list_union(new_words, wlist[recID]) else: # usual tag-by-tag indexing: for tag in self.fields_to_index: get_words_function = self.tag_to_words_fnc_map.get(tag, self.default_get_words_fnc) bibXXx = "bib" + tag[0] + tag[1] + "x" bibrec_bibXXx = "bibrec_" + bibXXx query = """SELECT bb.id_bibrec,b.value FROM %s AS b, %s AS bb WHERE bb.id_bibrec BETWEEN %%s AND %%s AND bb.id_bibxxx=b.id AND tag LIKE %%s""" % (bibXXx, bibrec_bibXXx) res = run_sql(query, (recID1, recID2, tag)) if tag == '8564_u': ## FIXME: Quick hack to be sure that hidden files are ## actually indexed. res = set(res) for recid in xrange(int(recID1), int(recID2) + 1): for bibdocfile in BibRecDocs(recid).list_latest_files(): res.add((recid, bibdocfile.get_url())) for row in res: recID,phrase = row if not wlist.has_key(recID): wlist[recID] = [] new_words = get_words_function(phrase, stemming_language=self.stemming_language) # ,self.separators wlist[recID] = list_union(new_words, wlist[recID]) # lookup index-time synonyms: if CFG_BIBINDEX_SYNONYM_KBRS.has_key(self.index_name): if len(wlist) == 0: return 0 recIDs = wlist.keys() for recID in recIDs: for word in wlist[recID]: word_synonyms = get_synonym_terms(word, CFG_BIBINDEX_SYNONYM_KBRS[self.index_name][0], CFG_BIBINDEX_SYNONYM_KBRS[self.index_name][1]) if word_synonyms: wlist[recID] = list_union(word_synonyms, wlist[recID]) # were there some words for these recIDs found? if len(wlist) == 0: return 0 recIDs = wlist.keys() for recID in recIDs: # was this record marked as deleted? if "DELETED" in self.get_field(recID, "980__c"): wlist[recID] = [] write_message("... record %d was declared deleted, removing its word list" % recID, verbose=9) write_message("... record %d, termlist: %s" % (recID, wlist[recID]), verbose=9) # put words into reverse index table with FUTURE status: for recID in recIDs: run_sql("INSERT INTO %sR (id_bibrec,termlist,type) VALUES (%%s,%%s,'FUTURE')" % self.tablename[:-1], (recID, serialize_via_marshal(wlist[recID]))) # ... and, for new records, enter the CURRENT status as empty: try: run_sql("INSERT INTO %sR (id_bibrec,termlist,type) VALUES (%%s,%%s,'CURRENT')" % self.tablename[:-1], (recID, serialize_via_marshal([]))) except DatabaseError: # okay, it's an already existing record, no problem pass # put words into memory word list: put = self.put for recID in recIDs: for w in wlist[recID]: put(recID, w, 1) return len(recIDs) def log_progress(self, start, done, todo): """Calculate progress and store it. start: start time, done: records processed, todo: total number of records""" time_elapsed = time.time() - start # consistency check if time_elapsed == 0 or done > todo: return time_recs_per_min = done/(time_elapsed/60.0) write_message("%d records took %.1f seconds to complete.(%1.f recs/min)"\ % (done, time_elapsed, time_recs_per_min)) if time_recs_per_min: write_message("Estimated runtime: %.1f minutes" % \ ((todo-done)/time_recs_per_min)) def put(self, recID, word, sign): """Adds/deletes a word to the word list.""" try: if self.wash_index_terms: word = wash_index_term(word, self.wash_index_terms) if self.value.has_key(word): # the word 'word' exist already: update sign self.value[word][recID] = sign else: self.value[word] = {recID: sign} except: write_message("Error: Cannot put word %s with sign %d for recID %s." % (word, sign, recID)) def del_recIDs(self, recIDs): """Fetches records which id in the recIDs range list and adds them to the wordTable. The recIDs range list is of the form: [[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]]. """ count = 0 for arange in recIDs: self.del_recID_range(arange[0],arange[1]) count = count + arange[1] - arange[0] self.put_into_db() def del_recID_range(self, low, high): """Deletes records with 'recID' system number between low and high from memory words index table.""" write_message("%s fetching existing words for records #%d-#%d started" % \ (self.tablename, low, high), verbose=3) self.recIDs_in_mem.append([low,high]) query = """SELECT id_bibrec,termlist FROM %sR as bb WHERE bb.id_bibrec BETWEEN %%s AND %%s""" % (self.tablename[:-1]) recID_rows = run_sql(query, (low, high)) for recID_row in recID_rows: recID = recID_row[0] wlist = deserialize_via_marshal(recID_row[1]) for word in wlist: self.put(recID, word, -1) write_message("%s fetching existing words for records #%d-#%d ended" % \ (self.tablename, low, high), verbose=3) def report_on_table_consistency(self): """Check reverse words index tables (e.g. idxWORD01R) for interesting states such as 'TEMPORARY' state. Prints small report (no of words, no of bad words). """ # find number of words: query = """SELECT COUNT(*) FROM %s""" % (self.tablename) res = run_sql(query, None, 1) if res: nb_words = res[0][0] else: nb_words = 0 # find number of records: query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR""" % (self.tablename[:-1]) res = run_sql(query, None, 1) if res: nb_records = res[0][0] else: nb_records = 0 # report stats: write_message("%s contains %d words from %d records" % (self.tablename, nb_words, nb_records)) # find possible bad states in reverse tables: query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1]) res = run_sql(query) if res: nb_bad_records = res[0][0] else: nb_bad_records = 999999999 if nb_bad_records: write_message("EMERGENCY: %s needs to repair %d of %d index records" % \ (self.tablename, nb_bad_records, nb_records)) else: write_message("%s is in consistent state" % (self.tablename)) return nb_bad_records def repair(self, opt_flush): """Repair the whole table""" # find possible bad states in reverse tables: query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1]) res = run_sql(query, None, 1) if res: nb_bad_records = res[0][0] else: nb_bad_records = 0 if nb_bad_records == 0: return query = """SELECT id_bibrec FROM %sR WHERE type <> 'CURRENT'""" \ % (self.tablename[:-1]) res = intbitset(run_sql(query)) recIDs = create_range_list(list(res)) flush_count = 0 records_done = 0 records_to_go = 0 for arange in recIDs: records_to_go = records_to_go + arange[1] - arange[0] + 1 time_started = time.time() # will measure profile time for arange in recIDs: i_low = arange[0] chunksize_count = 0 while i_low <= arange[1]: # calculate chunk group of recIDs and treat it: i_high = min(i_low+opt_flush-flush_count-1,arange[1]) i_high = min(i_low+chunksize-chunksize_count-1, i_high) try: self.fix_recID_range(i_low, i_high) except StandardError, e: write_message("Exception caught: %s" % e, sys.stderr) register_exception(alert_admin=True) task_update_status("ERROR") self.put_into_db() sys.exit(1) flush_count = flush_count + i_high - i_low + 1 chunksize_count = chunksize_count + i_high - i_low + 1 records_done = records_done + i_high - i_low + 1 if chunksize_count >= chunksize: chunksize_count = 0 # flush if necessary: if flush_count >= opt_flush: self.put_into_db("emergency") self.clean() flush_count = 0 self.log_progress(time_started,records_done,records_to_go) # iterate: i_low = i_high + 1 if flush_count > 0: self.put_into_db("emergency") self.log_progress(time_started,records_done,records_to_go) write_message("%s inconsistencies repaired." % self.tablename) def chk_recID_range(self, low, high): """Check if the reverse index table is in proper state""" ## check db query = """SELECT COUNT(*) FROM %sR WHERE type <> 'CURRENT' AND id_bibrec BETWEEN %%s AND %%s""" % self.tablename[:-1] res = run_sql(query, (low, high), 1) if res[0][0]==0: write_message("%s for %d-%d is in consistent state" % (self.tablename,low,high)) return # okay, words table is consistent ## inconsistency detected! write_message("EMERGENCY: %s inconsistencies detected..." % self.tablename) error_message = "Errors found. You should check consistency of the " \ "%s - %sR tables.\nRunning 'bibindex --repair' is " \ "recommended." % (self.tablename, self.tablename[:-1]) write_message("EMERGENCY: " + error_message, stream=sys.stderr) raise StandardError, error_message def fix_recID_range(self, low, high): """Try to fix reverse index database consistency (e.g. table idxWORD01R) in the low,high doc-id range. Possible states for a recID follow: CUR TMP FUT: very bad things have happened: warn! CUR TMP : very bad things have happened: warn! CUR FUT: delete FUT (crash before flushing) CUR : database is ok TMP FUT: add TMP to memory and del FUT from memory flush (revert to old state) TMP : very bad things have happened: warn! FUT: very bad things have happended: warn! """ state = {} query = "SELECT id_bibrec,type FROM %sR WHERE id_bibrec BETWEEN %%s AND %%s"\ % self.tablename[:-1] res = run_sql(query, (low, high)) for row in res: if not state.has_key(row[0]): state[row[0]]=[] state[row[0]].append(row[1]) ok = 1 # will hold info on whether we will be able to repair for recID in state.keys(): if not 'TEMPORARY' in state[recID]: if 'FUTURE' in state[recID]: if 'CURRENT' not in state[recID]: write_message("EMERGENCY: Index record %d is in inconsistent state. Can't repair it." % recID) ok = 0 else: write_message("EMERGENCY: Inconsistency in index record %d detected" % recID) query = """DELETE FROM %sR WHERE id_bibrec=%%s""" % self.tablename[:-1] run_sql(query, (recID, )) write_message("EMERGENCY: Inconsistency in record %d repaired." % recID) else: if 'FUTURE' in state[recID] and not 'CURRENT' in state[recID]: self.recIDs_in_mem.append([recID,recID]) # Get the words file query = """SELECT type,termlist FROM %sR WHERE id_bibrec=%%s""" % self.tablename[:-1] write_message(query, verbose=9) res = run_sql(query, (recID, )) for row in res: wlist = deserialize_via_marshal(row[1]) write_message("Words are %s " % wlist, verbose=9) if row[0] == 'TEMPORARY': sign = 1 else: sign = -1 for word in wlist: self.put(recID, word, sign) else: write_message("EMERGENCY: %s for %d is in inconsistent " "state. Couldn't repair it." % (self.tablename, recID), stream=sys.stderr) ok = 0 if not ok: error_message = "Unrepairable errors found. You should check " \ "consistency of the %s - %sR tables. Deleting affected " \ "TEMPORARY and FUTURE entries from these tables is " \ "recommended; see the BibIndex Admin Guide." % \ (self.tablename, self.tablename[:-1]) write_message("EMERGENCY: " + error_message, stream=sys.stderr) raise StandardError, error_message def main(): """Main that construct all the bibtask.""" task_init(authorization_action='runbibindex', authorization_msg="BibIndex Task Submission", description="""Examples: \t%s -a -i 234-250,293,300-500 -u admin@localhost \t%s -a -w author,fulltext -M 8192 -v3 \t%s -d -m +4d -A on --flush=10000\n""" % ((sys.argv[0],) * 3), help_specific_usage=""" Indexing options: -a, --add\t\tadd or update words for selected records -d, --del\t\tdelete words for selected records -i, --id=low[-high]\t\tselect according to doc recID -m, --modified=from[,to]\tselect according to modification date -c, --collection=c1[,c2]\tselect according to collection -R, --reindex\treindex the selected indexes from scratch Repairing options: -k, --check\t\tcheck consistency for all records in the table(s) -r, --repair\t\ttry to repair all records in the table(s) Specific options: -w, --windex=w1[,w2]\tword/phrase indexes to consider (all) -M, --maxmem=XXX\tmaximum memory usage in kB (no limit) -f, --flush=NNN\t\tfull consistent table flush after NNN records (10000) """, version=__revision__, specific_params=("adi:m:c:w:krRM:f:", [ "add", "del", "id=", "modified=", "collection=", "windex=", "check", "repair", "reindex", "maxmem=", "flush=", ]), task_stop_helper_fnc=task_stop_table_close_fnc, task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core, task_submit_check_options_fnc=task_submit_check_options) def task_submit_check_options(): """Check for options compatibility.""" if task_get_option("reindex"): if task_get_option("cmd") != "add" or task_get_option('id') or task_get_option('collection'): print >> sys.stderr, "ERROR: You can use --reindex only when adding modified record." return False return True def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ['-n', '--number']: self.options['number'] = value return True return False """ if key in ("-a", "--add"): task_set_option("cmd", "add") if ("-x","") in opts or ("--del","") in opts: raise StandardError, "Can not have --add and --del at the same time!" elif key in ("-k", "--check"): task_set_option("cmd", "check") elif key in ("-r", "--repair"): task_set_option("cmd", "repair") elif key in ("-d", "--del"): task_set_option("cmd", "del") elif key in ("-i", "--id"): task_set_option('id', task_get_option('id') + split_ranges(value)) elif key in ("-m", "--modified"): task_set_option("modified", get_date_range(value)) elif key in ("-c", "--collection"): task_set_option("collection", value) elif key in ("-R", "--reindex"): task_set_option("reindex", True) elif key in ("-w", "--windex"): task_set_option("windex", value) elif key in ("-M", "--maxmem"): task_set_option("maxmem", int(value)) if task_get_option("maxmem") < base_process_size + 1000: raise StandardError, "Memory usage should be higher than %d kB" % \ (base_process_size + 1000) elif key in ("-f", "--flush"): task_set_option("flush", int(value)) else: return False return True def task_stop_table_close_fnc(): """ Close tables to STOP. """ global _last_word_table if _last_word_table: _last_word_table.put_into_db() def task_run_core(): """Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. The task prints Fibonacci numbers for up to NUM on the stdout, and some messages on stderr. Return 1 in case of success and 0 in case of failure.""" global _last_word_table if task_get_option("cmd") == "check": wordTables = get_word_tables(task_get_option("windex")) for index_id, index_name, index_tags in wordTables: if index_name == 'year' and CFG_INSPIRE_SITE: fnc_get_words_from_phrase = get_words_from_date_tag elif index_name in ('author', 'firstauthor') and \ CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES: fnc_get_words_from_phrase = get_author_family_name_words_from_phrase else: fnc_get_words_from_phrase = get_words_from_phrase wordTable = WordTable(index_name=index_name, index_id=index_id, fields_to_index=index_tags, table_name_pattern='idxWORD%02dF', default_get_words_fnc=fnc_get_words_from_phrase, tag_to_words_fnc_map={'8564_u': get_words_from_fulltext}, wash_index_terms=50) _last_word_table = wordTable wordTable.report_on_table_consistency() task_sleep_now_if_required(can_stop_too=True) if index_name in ('author', 'firstauthor') and \ CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES: fnc_get_pairs_from_phrase = get_pairs_from_phrase # FIXME else: fnc_get_pairs_from_phrase = get_pairs_from_phrase wordTable = WordTable(index_name=index_name, index_id=index_id, fields_to_index=index_tags, table_name_pattern='idxPAIR%02dF', default_get_words_fnc=fnc_get_pairs_from_phrase, tag_to_words_fnc_map={'8564_u': get_nothing_from_phrase}, wash_index_terms=100) _last_word_table = wordTable wordTable.report_on_table_consistency() task_sleep_now_if_required(can_stop_too=True) if index_name in ('author', 'firstauthor'): fnc_get_phrases_from_phrase = get_fuzzy_authors_from_phrase elif index_name in ('exactauthor', 'exactfirstauthor'): fnc_get_phrases_from_phrase = get_exact_authors_from_phrase else: fnc_get_phrases_from_phrase = get_phrases_from_phrase wordTable = WordTable(index_name=index_name, index_id=index_id, fields_to_index=index_tags, table_name_pattern='idxPHRASE%02dF', default_get_words_fnc=fnc_get_phrases_from_phrase, tag_to_words_fnc_map={'8564_u': get_nothing_from_phrase}, wash_index_terms=0) _last_word_table = wordTable wordTable.report_on_table_consistency() task_sleep_now_if_required(can_stop_too=True) _last_word_table = None return True # Let's work on single words! wordTables = get_word_tables(task_get_option("windex")) for index_id, index_name, index_tags in wordTables: is_fulltext_index = index_name == 'fulltext' reindex_prefix = "" if task_get_option("reindex"): reindex_prefix = "tmp_" init_temporary_reindex_tables(index_id, reindex_prefix) if index_name == 'year' and CFG_INSPIRE_SITE: fnc_get_words_from_phrase = get_words_from_date_tag elif index_name in ('author', 'firstauthor') and \ CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES: fnc_get_words_from_phrase = get_author_family_name_words_from_phrase else: fnc_get_words_from_phrase = get_words_from_phrase wordTable = WordTable(index_name=index_name, index_id=index_id, fields_to_index=index_tags, table_name_pattern=reindex_prefix + 'idxWORD%02dF', default_get_words_fnc=fnc_get_words_from_phrase, tag_to_words_fnc_map={'8564_u': get_words_from_fulltext}, is_fulltext_index=is_fulltext_index, wash_index_terms=50) _last_word_table = wordTable wordTable.report_on_table_consistency() try: if task_get_option("cmd") == "del": if task_get_option("id"): wordTable.del_recIDs(task_get_option("id")) task_sleep_now_if_required(can_stop_too=True) elif task_get_option("collection"): l_of_colls = task_get_option("collection").split(",") recIDs = perform_request_search(c=l_of_colls) recIDs_range = [] for recID in recIDs: recIDs_range.append([recID,recID]) wordTable.del_recIDs(recIDs_range) task_sleep_now_if_required(can_stop_too=True) else: error_message = "Missing IDs of records to delete from " \ "index %s." % wordTable.tablename write_message(error_message, stream=sys.stderr) raise StandardError, error_message elif task_get_option("cmd") == "add": if task_get_option("id"): wordTable.add_recIDs(task_get_option("id"), task_get_option("flush")) task_sleep_now_if_required(can_stop_too=True) elif task_get_option("collection"): l_of_colls = task_get_option("collection").split(",") recIDs = perform_request_search(c=l_of_colls) recIDs_range = [] for recID in recIDs: recIDs_range.append([recID,recID]) wordTable.add_recIDs(recIDs_range, task_get_option("flush")) task_sleep_now_if_required(can_stop_too=True) else: wordTable.add_recIDs_by_date(task_get_option("modified"), task_get_option("flush")) ## here we used to update last_updated info, if run via automatic mode; ## but do not update here anymore, since idxPHRASE will be acted upon later task_sleep_now_if_required(can_stop_too=True) elif task_get_option("cmd") == "repair": wordTable.repair(task_get_option("flush")) task_sleep_now_if_required(can_stop_too=True) else: error_message = "Invalid command found processing %s" % \ wordTable.tablename write_message(error_message, stream=sys.stderr) raise StandardError, error_message except StandardError, e: write_message("Exception caught: %s" % e, sys.stderr) register_exception(alert_admin=True) task_update_status("ERROR") if _last_word_table: _last_word_table.put_into_db() sys.exit(1) wordTable.report_on_table_consistency() task_sleep_now_if_required(can_stop_too=True) # Let's work on pairs now if index_name in ('author', 'firstauthor') and \ CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES: fnc_get_pairs_from_phrase = get_pairs_from_phrase # FIXME else: fnc_get_pairs_from_phrase = get_pairs_from_phrase wordTable = WordTable(index_name=index_name, index_id=index_id, fields_to_index=index_tags, table_name_pattern=reindex_prefix + 'idxPAIR%02dF', default_get_words_fnc=fnc_get_pairs_from_phrase, tag_to_words_fnc_map={'8564_u': get_nothing_from_phrase}, wash_index_terms=100) _last_word_table = wordTable wordTable.report_on_table_consistency() try: if task_get_option("cmd") == "del": if task_get_option("id"): wordTable.del_recIDs(task_get_option("id")) task_sleep_now_if_required(can_stop_too=True) elif task_get_option("collection"): l_of_colls = task_get_option("collection").split(",") recIDs = perform_request_search(c=l_of_colls) recIDs_range = [] for recID in recIDs: recIDs_range.append([recID,recID]) wordTable.del_recIDs(recIDs_range) task_sleep_now_if_required(can_stop_too=True) else: error_message = "Missing IDs of records to delete from " \ "index %s." % wordTable.tablename write_message(error_message, stream=sys.stderr) raise StandardError, error_message elif task_get_option("cmd") == "add": if task_get_option("id"): wordTable.add_recIDs(task_get_option("id"), task_get_option("flush")) task_sleep_now_if_required(can_stop_too=True) elif task_get_option("collection"): l_of_colls = task_get_option("collection").split(",") recIDs = perform_request_search(c=l_of_colls) recIDs_range = [] for recID in recIDs: recIDs_range.append([recID,recID]) wordTable.add_recIDs(recIDs_range, task_get_option("flush")) task_sleep_now_if_required(can_stop_too=True) else: wordTable.add_recIDs_by_date(task_get_option("modified"), task_get_option("flush")) # let us update last_updated timestamp info, if run via automatic mode: task_sleep_now_if_required(can_stop_too=True) elif task_get_option("cmd") == "repair": wordTable.repair(task_get_option("flush")) task_sleep_now_if_required(can_stop_too=True) else: error_message = "Invalid command found processing %s" % \ wordTable.tablename write_message(error_message, stream=sys.stderr) raise StandardError, error_message except StandardError, e: write_message("Exception caught: %s" % e, sys.stderr) register_exception() task_update_status("ERROR") if _last_word_table: _last_word_table.put_into_db() sys.exit(1) wordTable.report_on_table_consistency() task_sleep_now_if_required(can_stop_too=True) # Let's work on phrases now if index_name in ('author', 'firstauthor'): fnc_get_phrases_from_phrase = get_fuzzy_authors_from_phrase elif index_name in ('exactauthor', 'exactfirstauthor'): fnc_get_phrases_from_phrase = get_exact_authors_from_phrase else: fnc_get_phrases_from_phrase = get_phrases_from_phrase wordTable = WordTable(index_name=index_name, index_id=index_id, fields_to_index=index_tags, table_name_pattern=reindex_prefix + 'idxPHRASE%02dF', default_get_words_fnc=fnc_get_phrases_from_phrase, tag_to_words_fnc_map={'8564_u': get_nothing_from_phrase}, wash_index_terms=0) _last_word_table = wordTable wordTable.report_on_table_consistency() try: if task_get_option("cmd") == "del": if task_get_option("id"): wordTable.del_recIDs(task_get_option("id")) task_sleep_now_if_required(can_stop_too=True) elif task_get_option("collection"): l_of_colls = task_get_option("collection").split(",") recIDs = perform_request_search(c=l_of_colls) recIDs_range = [] for recID in recIDs: recIDs_range.append([recID,recID]) wordTable.del_recIDs(recIDs_range) task_sleep_now_if_required(can_stop_too=True) else: error_message = "Missing IDs of records to delete from " \ "index %s." % wordTable.tablename write_message(error_message, stream=sys.stderr) raise StandardError, error_message elif task_get_option("cmd") == "add": if task_get_option("id"): wordTable.add_recIDs(task_get_option("id"), task_get_option("flush")) task_sleep_now_if_required(can_stop_too=True) elif task_get_option("collection"): l_of_colls = task_get_option("collection").split(",") recIDs = perform_request_search(c=l_of_colls) recIDs_range = [] for recID in recIDs: recIDs_range.append([recID,recID]) wordTable.add_recIDs(recIDs_range, task_get_option("flush")) task_sleep_now_if_required(can_stop_too=True) else: wordTable.add_recIDs_by_date(task_get_option("modified"), task_get_option("flush")) # let us update last_updated timestamp info, if run via automatic mode: update_index_last_updated(index_id, task_get_task_param('task_starting_time')) task_sleep_now_if_required(can_stop_too=True) elif task_get_option("cmd") == "repair": wordTable.repair(task_get_option("flush")) task_sleep_now_if_required(can_stop_too=True) else: error_message = "Invalid command found processing %s" % \ wordTable.tablename write_message(error_message, stream=sys.stderr) raise StandardError, error_message except StandardError, e: write_message("Exception caught: %s" % e, sys.stderr) register_exception() task_update_status("ERROR") if _last_word_table: _last_word_table.put_into_db() sys.exit(1) wordTable.report_on_table_consistency() task_sleep_now_if_required(can_stop_too=True) if task_get_option("reindex"): swap_temporary_reindex_tables(index_id, reindex_prefix) update_index_last_updated(index_id, task_get_task_param('task_starting_time')) task_sleep_now_if_required(can_stop_too=True) _last_word_table = None return True ## import optional modules: try: import psyco psyco.bind(get_words_from_phrase) psyco.bind(WordTable.merge_with_old_recIDs) except: pass ### okay, here we go: if __name__ == '__main__': main() diff --git a/modules/bibmerge/lib/bibmerge_engine.py b/modules/bibmerge/lib/bibmerge_engine.py index 19dee1355..e4e2ba08a 100644 --- a/modules/bibmerge/lib/bibmerge_engine.py +++ b/modules/bibmerge/lib/bibmerge_engine.py @@ -1,432 +1,432 @@ ## This file is part of Invenio. ## Copyright (C) 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0103 """Invenio BibMerge Engine.""" import os from invenio.bibmerge_merger import merge_field_group, replace_field, \ add_field, delete_field, merge_field, \ add_subfield, replace_subfield, \ delete_subfield, copy_R2_to_R1, merge_record from invenio.search_engine import print_record, perform_request_search, \ - get_fieldvalues + record_exists +from invenio.search_engine_utils import get_fieldvalues from invenio.bibedit_utils import cache_exists, cache_expired, \ create_cache_file, delete_cache_file, get_cache_file_contents, \ get_cache_mtime, latest_record_revision, record_locked_by_other_user, \ record_locked_by_queue, save_xml_record, touch_cache_file, \ update_cache_file_contents, _get_file_path, \ get_record_revision_ids, revision_format_valid_p, split_revid, \ get_marcxml_of_revision_id from invenio.htmlutils import remove_html_markup -from invenio.search_engine import record_exists from invenio.bibrecord import create_record, record_xml_output, record_add_field from invenio.bibedit_config import CFG_BIBEDIT_TO_MERGE_SUFFIX import invenio.template bibmerge_templates = invenio.template.load('bibmerge') def perform_request_init(): """Handle the initial request. """ errors = [] warnings = [] body = '' # Build page structure and control panel. body += bibmerge_templates.controlpanel() body += """
    """ return body, errors, warnings def perform_request_ajax(req, uid, data): """Ajax request dispatcher.\ """ requestType = data['requestType'] if requestType in ('getRecordCompare', 'submit', 'cancel', 'recCopy', \ 'recMerge', 'recMergeNC'): return perform_request_record(requestType, uid, data) elif requestType in ('getFieldGroup', 'getFieldGroupDiff', \ 'mergeFieldGroup', 'mergeNCFieldGroup', 'replaceField', 'addField', \ 'deleteField', 'mergeField'): return perform_request_update_record(requestType, uid, data) elif requestType in ('deleteSubfield', 'addSubfield', 'replaceSubfield', \ 'diffSubfield'): return perform_small_request_update_record(requestType, uid, data) elif requestType == "searchCandidates" or requestType == "searchRevisions": return perform_candidate_record_search(requestType, data) else: return { 'resultCode': 1, 'resultText': 'Error unknown' } def perform_candidate_record_search(requestType, data): """Handle search requests. """ max_results = 999 too_many = False result = { 'resultCode': 0, 'resultText': '' } if requestType == "searchCandidates": recids = perform_request_search( p=data['query'] ) if len(recids) > max_results: too_many = True else: captions = [ search_result_info(x) for x in recids ] alternative_titles = [ remove_html_markup(print_record(x, "hs")) for x in recids ] search_results = [recids, captions, alternative_titles] elif requestType == "searchRevisions": revisions = get_record_revision_ids( data['recID1'] ) captions = [ split_revid(x, 'datetext')[1] for x in revisions ] search_results = [revisions, captions] if too_many == True: result['resultCode'] = 1 result['resultText'] = 'Too many results' else: result['results'] = search_results result['resultText'] = '%s results' % len(search_results[0]) return result def search_result_info(recid): """Return report number of a record or if it doen't exist return the recid itself. """ report_numbers = get_fieldvalues(recid, '037__a') if len(report_numbers) == 0: return "#"+str(recid) else: return report_numbers[0] def perform_request_record(requestType, uid, data): """Handle 'major' record related requests. Handle retrieving, submitting or cancelling the merging session. """ #TODO add checks before submission and cancel, replace get_bibrecord call result = { 'resultCode': 0, 'resultText': '' } recid1 = data["recID1"] record1 = _get_record(recid1, uid, result) if result['resultCode'] != 0: #if record not accessible return error information return result if requestType == 'submit': if data.has_key('duplicate'): recid2 = data['duplicate'] record2 = _get_record_slave(recid2, result, 'recid', uid) if result['resultCode'] != 0: #return in case of error return result # mark record2 as deleted record_add_field(record2, '980', ' ', ' ', '', [('c', 'DELETED')]) # mark record2 as duplicate of record1 record_add_field(record2, '970', ' ', ' ', '', [('d', str(recid1))]) #submit record1 xml_record1 = record_xml_output(record1) save_xml_record(recid1, uid, xml_record1) #submit record2 xml_record2 = record_xml_output(record2) save_xml_record(recid2, uid, xml_record2) result['resultText'] = 'Records submitted' return result #submit record1 save_xml_record(recid1, uid) result['resultText'] = 'Record submitted' return result elif requestType == 'cancel': delete_cache_file(recid1, uid) result['resultText'] = 'Cancelled' return result recid2 = data["recID2"] mode = data['record2Mode'] record2 = _get_record_slave(recid2, result, mode, uid) if result['resultCode'] != 0: #if record not accessible return error information return result if requestType == 'getRecordCompare': result['resultHtml'] = bibmerge_templates.BM_html_all_diff(record1, record2) result['resultText'] = 'Records compared' elif requestType == 'recCopy': copy_R2_to_R1(record1, record2) result['resultHtml'] = bibmerge_templates.BM_html_all_diff(record1, record2) result['resultText'] = 'Record copied' elif requestType == 'recMerge': merge_record(record1, record2, merge_conflicting_fields=True) result['resultHtml'] = bibmerge_templates.BM_html_all_diff(record1, record2) result['resultText'] = 'Records merged' elif requestType == 'recMergeNC': merge_record(record1, record2, merge_conflicting_fields=False) result['resultHtml'] = bibmerge_templates.BM_html_all_diff(record1, record2) result['resultText'] = 'Records merged' else: result['resultCode'], result['resultText'] = 1, 'Wrong request type' return result def perform_request_update_record(requestType, uid, data): """Handle record update requests for actions on a field level. Handle merging, adding, or replacing of fields. """ result = { 'resultCode': 0, 'resultText': '' } recid1 = data["recID1"] recid2 = data["recID2"] record_content = get_cache_file_contents(recid1, uid) cache_dirty = record_content[0] rec_revision = record_content[1] record1 = record_content[2] pending_changes = record_content[3] disabled_hp_changes = record_content[4] # We will not be able to Undo/Redo correctly after any modifications # from the level of bibmerge are performed ! We clear all the undo/redo # lists undo_list = [] redo_list = [] mode = data['record2Mode'] record2 = _get_record_slave(recid2, result, mode, uid) if result['resultCode'] != 0: #if record not accessible return error information return result if requestType == 'getFieldGroup': result['resultHtml'] = bibmerge_templates.BM_html_field_group(record1, record2, data['fieldTag']) result['resultText'] = 'Field group retrieved' return result elif requestType == 'getFieldGroupDiff': result['resultHtml'] = bibmerge_templates.BM_html_field_group(record1, record2, data['fieldTag'], True) result['resultText'] = 'Fields compared' return result elif requestType == 'mergeFieldGroup' or requestType == 'mergeNCFieldGroup': fnum, ind1, ind2 = _fieldtagNum_and_indicators(data['fieldTag']) if requestType == 'mergeNCFieldGroup': merge_field_group(record1, record2, fnum, ind1, ind2, False) else: merge_field_group(record1, record2, fnum, ind1, ind2, True) resultText = 'Field group merged' elif requestType == 'replaceField' or requestType == 'addField': fnum, ind1, ind2 = _fieldtagNum_and_indicators(data['fieldTag']) findex1 = _field_info( data['fieldCode1'] )[1] findex2 = _field_info( data['fieldCode2'] )[1] if findex2 == None: result['resultCode'], result['resultText'] = 1, 'No value in the selected field' return result if requestType == 'replaceField': replace_field(record1, record2, fnum, findex1, findex2) resultText = 'Field replaced' else: # requestType == 'addField' add_field(record1, record2, fnum, findex1, findex2) resultText = 'Field added' elif requestType == 'deleteField': fnum, ind1, ind2 = _fieldtagNum_and_indicators(data['fieldTag']) findex1 = _field_info( data['fieldCode1'] )[1] if findex1 == None: result['resultCode'], result['resultText'] = 1, 'No value in the selected field' return result delete_field(record1, fnum, findex1) resultText = 'Field deleted' elif requestType == 'mergeField': fnum, ind1, ind2 = _fieldtagNum_and_indicators(data['fieldTag']) findex1 = _field_info( data['fieldCode1'] )[1] findex2 = _field_info( data['fieldCode2'] )[1] if findex2 == None: result['resultCode'], result['resultText'] = 1, 'No value in the selected field' return result merge_field(record1, record2, fnum, findex1, findex2) resultText = 'Field merged' else: result['resultCode'], result['resultText'] = 1, 'Wrong request type' return result result['resultHtml'] = bibmerge_templates.BM_html_field_group(record1, record2, data['fieldTag']) result['resultText'] = resultText update_cache_file_contents(recid1, uid, rec_revision, record1, pending_changes, disabled_hp_changes, undo_list, redo_list) return result def perform_small_request_update_record(requestType, uid, data): """Handle record update requests for actions on a subfield level. Handle adding, replacing or deleting of subfields. """ result = { 'resultCode': 0, 'resultText': '', 'resultHtml': '' } recid1 = data["recID1"] recid2 = data["recID2"] cache_content = get_cache_file_contents(recid1, uid) #TODO: check mtime, existence cache_dirty = cache_content[0] rec_revision = cache_content[1] record1 = cache_content[2] pending_changes = cache_content[3] disabled_hp_changes = cache_content[4] mode = data['record2Mode'] record2 = _get_record_slave(recid2, result, mode, uid) if result['resultCode'] != 0: #if record not accessible return error information return result ftag, findex1 = _field_info(data['fieldCode1']) fnum = ftag[:3] findex2 = _field_info(data['fieldCode2'])[1] sfindex1 = data['sfindex1'] sfindex2 = data['sfindex2'] if requestType == 'deleteSubfield': delete_subfield(record1, fnum, findex1, sfindex1) result['resultText'] = 'Subfield deleted' elif requestType == 'addSubfield': add_subfield(record1, record2, fnum, findex1, findex2, sfindex1, sfindex2) result['resultText'] = 'Subfield added' elif requestType == 'replaceSubfield': replace_subfield(record1, record2, fnum, findex1, findex2, sfindex1, sfindex2) result['resultText'] = 'Subfield replaced' elif requestType == 'diffSubfield': result['resultHtml'] = bibmerge_templates.BM_html_subfield_row_diffed(record1, record2, fnum, findex1, findex2, sfindex1, sfindex2) result['resultText'] = 'Subfields diffed' update_cache_file_contents(recid1, uid, rec_revision, record1, pending_changes, disabled_hp_changes, [], []) return result def _get_record(recid, uid, result, fresh_record=False): """Retrieve record structure. """ record = None mtime = None cache_dirty = None record_status = record_exists(recid) existing_cache = cache_exists(recid, uid) if record_status == 0: result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid elif record_status == -1: result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid elif not existing_cache and record_locked_by_other_user(recid, uid): result['resultCode'], result['resultText'] = 1, 'Record %s locked by user' % recid elif existing_cache and cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): result['resultCode'], result['resultText'] = 1, 'Record %s locked by user' % recid elif record_locked_by_queue(recid): result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid else: if fresh_record: delete_cache_file(recid, uid) existing_cache = False if not existing_cache: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) cache_dirty = False else: tmpRes = get_cache_file_contents(recid, uid) cache_dirty, record_revision, record = tmpRes[0], tmpRes[1], tmpRes[2] touch_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) if not latest_record_revision(recid, record_revision): result['cacheOutdated'] = True result['resultCode'], result['resultText'], result['cacheDirty'], result['cacheMTime'] = 0, 'Record OK', cache_dirty, mtime return record def _get_record_slave(recid, result, mode=None, uid=None): """Check if record exists and return it in dictionary format. If any kind of error occurs returns None. If mode=='revision' then recid parameter is considered as revid.""" record = None if recid == 'none': mode = 'none' if mode == 'recid': record_status = record_exists(recid) #check for errors if record_status == 0: result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid elif record_status == -1: result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid elif record_locked_by_queue(recid): result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid else: record = create_record( print_record(recid, 'xm') )[0] elif mode == 'tmpfile': file_path = '%s_%s.xml' % (_get_file_path(recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX) if not os.path.isfile(file_path): #check if file doesn't exist result['resultCode'], result['resultText'] = 1, 'Temporary file doesnt exist' else: #open file tmpfile = open(file_path, 'r') record = create_record( tmpfile.read() )[0] tmpfile.close() elif mode == 'revision': if revision_format_valid_p(recid): marcxml = get_marcxml_of_revision_id(recid) if marcxml: record = create_record(marcxml)[0] else: result['resultCode'], result['resultText'] = 1, 'The specified revision does not exist' else: result['resultCode'], result['resultText'] = 1, 'Invalid revision id' elif mode == 'none': return {} else: result['resultCode'], result['resultText'] = 1, 'Invalid record mode for record2' return record def _field_info(fieldIdCode): """Returns a tuple: (field-tag, field-index) eg.: _field_info('R1-8560_-2') --> ('8560_', 2) """ info = fieldIdCode.split('-') if info[2] == 'None': info[2] = None else: info[2] = int(info[2]) return tuple( info[1:] ) def _fieldtagNum_and_indicators(fieldTag): """Separate a 5-char field tag to a 3-character field-tag number and two indicators""" fnum, ind1, ind2 = fieldTag[:3], fieldTag[3], fieldTag[4] if ind1 == '_': ind1 = ' ' if ind2 == '_': ind2 = ' ' return (fnum, ind1, ind2) diff --git a/modules/bibrank/lib/bibrank_citation_indexer.py b/modules/bibrank/lib/bibrank_citation_indexer.py index af94765e8..212c9193b 100644 --- a/modules/bibrank/lib/bibrank_citation_indexer.py +++ b/modules/bibrank/lib/bibrank_citation_indexer.py @@ -1,1073 +1,1073 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" import re import time import sys import os import zlib if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from invenio.dbquery import run_sql, serialize_via_marshal, \ deserialize_via_marshal -from invenio.search_engine import search_pattern, get_fieldvalues, \ - search_unit +from invenio.search_engine import search_pattern, search_unit +from invenio.search_engine_utils import get_fieldvalues from invenio.bibformat_utils import parse_tag from invenio.bibtask import write_message, task_get_option, \ task_update_progress, task_sleep_now_if_required, \ task_get_task_param from invenio.errorlib import register_exception class memoise: def __init__(self, function): self.memo = {} self.function = function def __call__(self, *args): if self.memo.has_key(args): return self.memo[args] else: object = self.memo[args] = self.function(*args) return object def get_recids_matching_query(pvalue, fvalue): """Return list of recIDs matching query for PVALUE and FVALUE.""" rec_id = list(search_pattern(p=pvalue, f=fvalue, m='e')) return rec_id get_recids_matching_query = memoise(get_recids_matching_query) def get_citation_weight(rank_method_code, config): """return a dictionary which is used by bibrank daemon for generating the index of sorted research results by citation information """ begin_time = time.time() last_update_time = get_bibrankmethod_lastupdate(rank_method_code) if task_get_option("quick") == "no": last_update_time = "0000-00-00 00:00:00" write_message("running thorough indexing since quick option not used", verbose=3) last_modified_records = get_last_modified_rec(last_update_time) #id option forces re-indexing a certain range even if there are no new recs if last_modified_records or task_get_option("id"): if task_get_option("id"): #construct a range of records to index taskid = task_get_option("id") first = taskid[0][0] last = taskid[0][1] #make range, last+1 so that e.g. -i 1-2 really means [1,2] not [1] updated_recid_list = range(first, last+1) else: updated_recid_list = create_recordid_list(last_modified_records) write_message("Last update "+str(last_update_time)+" records: "+ \ str(len(last_modified_records))+" updates: "+ \ str(len(updated_recid_list))) #write_message("updated_recid_list: "+str(updated_recid_list)) result_intermediate = last_updated_result(rank_method_code) #result_intermed should be warranted to exists! #but if the user entered a "-R" (do all) option, we need to #make an empty start set if task_get_option("quick") == "no": result_intermediate = [{}, {}, {}] citation_weight_dic_intermediate = result_intermediate[0] citation_list_intermediate = result_intermediate[1] reference_list_intermediate = result_intermediate[2] #call the procedure that does the hard work by reading fields of #citations and references in the updated_recid's (but nothing else)! if task_get_task_param('verbose') >= 9: write_message("Entering get_citation_informations") citation_informations = get_citation_informations(updated_recid_list, config) #write_message("citation_informations: "+str(citation_informations)) #create_analysis_tables() #temporary.. #test how much faster in-mem indexing is write_message("Entering ref_analyzer", verbose=9) #call the analyser that uses the citation_informations to really #search x-cites-y in the coll.. dic = ref_analyzer(citation_informations, citation_weight_dic_intermediate, citation_list_intermediate, reference_list_intermediate, config,updated_recid_list) #dic is docid-numberofreferences like {1: 2, 2: 0, 3: 1} #write_message("Docid-number of known references "+str(dic)) end_time = time.time() write_message("Total time of get_citation_weight(): %.2f sec" % (end_time - begin_time)) task_update_progress("citation analysis done") else: dic = {} write_message("No new records added since last time this rank method was executed") return dic def get_bibrankmethod_lastupdate(rank_method_code): """return the last excution date of bibrank method """ query = """select last_updated from rnkMETHOD where name ='%s'""" % rank_method_code last_update_time = run_sql(query) r = last_update_time[0][0] if r is None: return "0000-00-00 00:00:00" return r def get_last_modified_rec(bibrank_method_lastupdate): """ return the list of recods which have been modified after the last exec of bibrank method. The result is expected to have ascending num order. """ query = """SELECT id FROM bibrec WHERE modification_date >= '%s' """ % bibrank_method_lastupdate query += "order by id ASC" ilist = run_sql(query) return ilist def create_recordid_list(rec_ids): """Create a list of record ids out of RECIDS. The result is expected to have ascending numerical order. """ rec_list = [] for row in rec_ids: rec_list.append(row[0]) return rec_list def create_record_tuple(ilist): """Creates a tuple of record id from a list of id. The result is expected to have ascending numerical order. """ list_length = len(ilist) if list_length: rec_tuple = '(' for row in list[0:list_length-1]: rec_tuple += str(row) rec_tuple += ',' rec_tuple += str(list[list_length-1]) rec_tuple += ')' else: rec_tuple = '()' return rec_tuple def last_updated_result(rank_method_code): """ return the last value of dictionary in rnkMETHODDATA table if it exists and initialize the value of last updated records by zero, otherwise an initial dictionary with zero as value for all recids """ result = [{}, {}, {}] query = """select relevance_data from rnkMETHOD, rnkMETHODDATA where rnkMETHOD.id = rnkMETHODDATA.id_rnkMETHOD and rnkMETHOD.Name = '%s'"""% rank_method_code rdict = run_sql(query) if rdict and rdict[0] and rdict[0][0]: #has to be prepared for corrupted data! try: dic = deserialize_via_marshal(rdict[0][0]) except zlib.error: return [{}, {}, {}] query = "select object_value from rnkCITATIONDATA where object_name='citationdict'" cit_compressed = run_sql(query) cit = [] if cit_compressed and cit_compressed[0] and cit_compressed[0][0]: cit = deserialize_via_marshal(cit_compressed[0][0]) if cit: query = """select object_value from rnkCITATIONDATA where object_name='reversedict'""" ref_compressed = run_sql(query) if ref_compressed and ref_compressed[0] and ref_compressed[0][0]: ref = deserialize_via_marshal(ref_compressed[0][0]) result = (dic, cit, ref) return result def get_citation_informations(recid_list, config): """scans the collections searching references (999C5x -fields) and citations for items in the recid_list returns a 4 list of dictionaries that contains the citation information of cds records examples: [ {} {} {} {} ] [ {5: 'SUT-DP-92-70-5'}, { 93: ['astro-ph/9812088']}, { 93: ['Phys. Rev. Lett. 96 (2006) 081301'] }, {} ] NB: stuff here is for analysing new or changed records. see "ref_analyzer" for more. """ begin_time = os.times()[4] d_reports_numbers = {} #dict of recid -> institute-given-report-code d_references_report_numbers = {} #dict of recid -> ['astro-ph/xyz'] d_references_s = {} #dict of recid -> list_of_the_entries_of_this_recs_bibliography d_records_s = {} #dict of recid -> this_records_publication_info citation_informations = [] write_message("config function "+config.get("rank_method", "function"), verbose=9) function = "" try: function = config.get("rank_method", "function") except: register_exception(prefix="cfg section [rank_method] has no attribute called function", alert_admin=True) #we cannot continue return [ {}, {}, {}, {} ] record_pri_number_tag = "" try: record_pri_number_tag = config.get(function, "primary_report_number") except: register_exception(prefix="cfg section "+function+" has no attribute primary_report_number", alert_admin=True) return [ {}, {}, {}, {} ] record_add_number_tag = "" try: record_add_number_tag = config.get(config.get("rank_method", "function"), "additional_report_number") except: register_exception(prefix="config error. cfg section "+function+" has no attribute additional_report_number", alert_admin=True) return [ {}, {}, {}, {} ] reference_number_tag = "" try: reference_number_tag = config.get(config.get("rank_method", "function"), "reference_via_report_number") except: register_exception(prefix="config error. cfg section "+function+" has no attribute reference_via_report_number", alert_admin=True) return [ {}, {}, {}, {} ] reference_tag = "" try: reference_tag = config.get(config.get("rank_method", "function"), "reference_via_pubinfo") except: register_exception(prefix="config error. cfg section "+function+" has no attribute reference_via_pubinfo", alert_admin=True) return [ {}, {}, {}, {} ] p_record_pri_number_tag = tagify(parse_tag(record_pri_number_tag)) #037a: contains (often) the "hep-ph/0501084" tag of THIS record p_record_add_number_tag = tagify(parse_tag(record_add_number_tag)) #088a: additional short identifier for the record p_reference_number_tag = tagify(parse_tag(reference_number_tag)) #999C5r. this is in the reference list, refers to other records. Looks like: hep-ph/0408002 p_reference_tag = tagify(parse_tag(reference_tag)) #999C5s. A standardized way of writing a reference in the reference list. Like: Nucl. Phys. B 710 (2000) 371 #fields needed to construct the pubinfo for this record publication_pages_tag = "" publication_year_tag = "" publication_journal_tag = "" publication_volume_tag = "" publication_format_string = "p v (y) c" try: tag = config.get(function, "pubinfo_journal_page") publication_pages_tag = tagify(parse_tag(tag)) tag = config.get(function, "pubinfo_journal_year") publication_year_tag = tagify(parse_tag(tag)) tag = config.get(function, "pubinfo_journal_title") publication_journal_tag = tagify(parse_tag(tag)) tag = config.get(function, "pubinfo_journal_volume") publication_volume_tag = tagify(parse_tag(tag)) publication_format_string = config.get(function, "pubinfo_journal_format") except: pass #print values for tags for debugging if task_get_task_param('verbose') >= 9: write_message("tag values") write_message("p_record_pri_number_tag "+str(p_record_pri_number_tag)) write_message("p_reference_tag "+str(p_reference_tag)) write_message("publication_journal_tag "+str(publication_journal_tag)) write_message("publication_format_string is "+publication_format_string) done = 0 #for status reporting numrecs = len(recid_list) # perform quick check to see if there are some records with # reference tags, because otherwise get.cit.inf would be slow even # if there is nothing to index: if run_sql("SELECT value FROM bib%sx WHERE tag=%%s LIMIT 1" % p_reference_tag[0:2], (p_reference_tag,)) or \ run_sql("SELECT value FROM bib%sx WHERE tag=%%s LIMIT 1" % p_reference_number_tag[0:2], (p_reference_number_tag,)): for recid in recid_list: if (done % 10 == 0): task_sleep_now_if_required() #in fact we can sleep any time here if (done % 1000 == 0): mesg = "get cit.inf done "+str(done)+" of "+str(numrecs) write_message(mesg) task_update_progress(mesg) done = done+1 pri_report_numbers = get_fieldvalues(recid, p_record_pri_number_tag) add_report_numbers = get_fieldvalues(recid, p_record_add_number_tag) reference_report_numbers = get_fieldvalues(recid, p_reference_number_tag) references_s = get_fieldvalues(recid, p_reference_tag) l_report_numbers = pri_report_numbers l_report_numbers.extend(add_report_numbers) d_reports_numbers[recid] = l_report_numbers if reference_report_numbers: d_references_report_numbers[recid] = reference_report_numbers references_s = get_fieldvalues(recid, p_reference_tag) write_message(str(recid)+"'s "+str(p_reference_tag)+" values "+str(references_s), verbose=9) if references_s: d_references_s[recid] = references_s #get a combination of #journal vol (year) pages if publication_pages_tag and publication_journal_tag and \ publication_volume_tag and publication_year_tag and publication_format_string: tagsvalues = {} #we store the tags and their values here #like c->444 y->1999 p->"journal of foo",v->20 tagsvalues["p"] = "" tagsvalues["y"] = "" tagsvalues["c"] = "" tagsvalues["v"] = "" tmp = get_fieldvalues(recid, publication_journal_tag) if tmp: tagsvalues["p"] = tmp[0] tmp = get_fieldvalues(recid, publication_volume_tag) if tmp: tagsvalues["v"] = tmp[0] tmp = get_fieldvalues(recid, publication_year_tag) if tmp: tagsvalues["y"] = tmp[0] tmp = get_fieldvalues(recid, publication_pages_tag) if tmp: #if the page numbers have "x-y" take just x pages = tmp[0] hpos = pages.find("-") if hpos > 0: pages = pages[:hpos] tagsvalues["c"] = pages #format the publ infostring according to the format publ = "" ok = 1 for i in range (0, len(publication_format_string)): current = publication_format_string[i] #these are supported if current == "p" or current == "c" or current == "v" \ or current == "y": if tagsvalues[current]: #add the value in the string publ += tagsvalues[current] else: ok = 0 break #it was needed and not found else: publ += current #just add the character in the format string if ok: write_message("d_records_s (publication info) for "+str(recid)+" is "+publ, verbose=9) d_records_s[recid] = publ else: mesg = "Warning: there are no records with tag values for " mesg += p_reference_number_tag+" or "+p_reference_tag+". Nothing to do." write_message(mesg) mesg = "get cit.inf done fully" write_message(mesg) task_update_progress(mesg) citation_informations.append(d_reports_numbers) citation_informations.append(d_references_report_numbers) citation_informations.append(d_references_s) citation_informations.append(d_records_s) end_time = os.times()[4] write_message("Execution time for generating citation info from record: %.2f sec" % \ (end_time - begin_time)) return citation_informations def get_self_citations(new_record_list, citationdic, initial_selfcitdict, config): """Check which items have been cited by one of the authors of the citing item: go through id's in new_record_list, use citationdic to get citations, update "selfcites". Selfcites is originally initial_selfcitdict. Return selfcites. """ i = 0 #just for debugging .. #get the tags for main author, coauthors, ext authors from config tags = ['first_author', 'additional_author', 'alternative_author_name'] for t in tags: try: dummy = config.get(config.get("rank_method", "function"), t) except: register_exception(prefix="attribute "+t+" missing in config", alert_admin=True) return initial_selfcitdict r_mainauthortag = config.get(config.get("rank_method", "function"), "first_author") r_coauthortag = config.get(config.get("rank_method", "function"), "additional_author") r_extauthortag = config.get(config.get("rank_method", "function"), "alternative_author_name") #parse the tags mainauthortag = tagify(parse_tag(r_mainauthortag)) coauthortag = tagify(parse_tag(r_coauthortag)) extauthortag = tagify(parse_tag(r_extauthortag)) selfcites = initial_selfcitdict for k in new_record_list: if (i % 1000 == 0): mesg = "Selfcites done "+str(i)+" of "+str(len(new_record_list))+" records" write_message(mesg) task_update_progress(mesg) i = i+1 #get the author of k authorlist = get_fieldvalues(k, mainauthortag) coauthl = get_fieldvalues(k, coauthortag) extauthl = get_fieldvalues(k, extauthortag) authorlist.append(coauthl) authorlist.append(extauthl) #author tag #print "record "+str(k)+" by "+str(authorlist) #print "is cited by" #get the "x-cites-this" list if citationdic.has_key(k): xct = citationdic[k] for c in xct: #get authors of c cauthorlist = get_fieldvalues(c, mainauthortag) coauthl = get_fieldvalues(c, coauthortag) extauthl = get_fieldvalues(c, extauthortag) cauthorlist.extend(coauthl) cauthorlist.extend(extauthl) #print str(c)+" by "+str(cauthorlist) for ca in cauthorlist: if (ca in authorlist): #found! if selfcites.has_key(k): val = selfcites[k] #add only if not there already if val: if not c in val: val.append(c) selfcites[k] = val else: #new key for selfcites selfcites[k] = [c] mesg = "Selfcites done fully" write_message(mesg) task_update_progress(mesg) return selfcites def get_author_citations(updated_redic_list, citedbydict, initial_author_dict, config): """Traverses citedbydict in order to build "which author is quoted where" dict. The keys of this are author names. An entry like "Apollinaire"->[1,2,3] means Apollinaire is cited in records 1,2 and 3. Input: citedbydict, updated_redic_list = records to be searched, initial_author_dict: the dicts from the database. Output: authorciteddict. It is initially set to initial_author_dict """ #sorry bout repeated code to get the tags tags = ['first_author', 'additional_author', 'alternative_author_name'] tagvals = {} for t in tags: try: x = config.get(config.get("rank_method", "function"), t) tagvals[t] = x except: register_exception(prefix="attribute "+t+" missing in config", alert_admin=True) return initial_author_dict #parse the tags mainauthortag = tagify(parse_tag(tagvals['first_author'])) coauthortag = tagify(parse_tag(tagvals['additional_author'])) extauthortag = tagify(parse_tag(tagvals['alternative_author_name'])) if task_get_task_param('verbose') >= 9: write_message("mainauthortag "+mainauthortag) write_message("coauthortag "+coauthortag) write_message("extauthortag "+extauthortag) author_cited_in = initial_author_dict if citedbydict: i = 0 #just a counter for debug write_message("Checking records referred to in new records") for u in updated_redic_list: if (i % 1000 == 0): mesg = "Author ref done "+str(i)+" of "+str(len(updated_redic_list))+" records" write_message(mesg) task_update_progress(mesg) i = i + 1 if citedbydict.has_key(u): these_cite_k = citedbydict[u] if (these_cite_k is None): these_cite_k = [] #verify it is an empty list, not None authors = get_fieldvalues(u, mainauthortag) coauthl = get_fieldvalues(u, coauthortag) extauthl = get_fieldvalues(u, extauthortag) authors.extend(coauthl) authors.extend(extauthl) for a in authors: if a and author_cited_in.has_key(a): #add all elements in these_cite_k #that are not there already for citer in these_cite_k: tmplist = author_cited_in[a] if (tmplist.count(citer) == 0): tmplist.append(citer) author_cited_in[a] = tmplist else: author_cited_in[a] = these_cite_k mesg = "Author ref done fully" write_message(mesg) task_update_progress(mesg) #go through the dictionary again: all keys but search only if new records are cited write_message("Checking authors in new records") i = 0 for k in citedbydict.keys(): if (i % 1000 == 0): mesg = "Author cit done "+str(i)+" of "+str(len(citedbydict.keys()))+" records" write_message(mesg) task_update_progress(mesg) i = i + 1 these_cite_k = citedbydict[k] if (these_cite_k is None): these_cite_k = [] #verify it is an empty list, not None #do things only if these_cite_k contains any new stuff intersec_list = list(set(these_cite_k)&set(updated_redic_list)) if intersec_list: authors = get_fieldvalues(k, mainauthortag) coauthl = get_fieldvalues(k, coauthortag) extauthl = get_fieldvalues(k, extauthortag) authors.extend(coauthl) authors.extend(extauthl) for a in authors: if a and author_cited_in.has_key(a): #add all elements in these_cite_k #that are not there already for citer in these_cite_k: tmplist = author_cited_in[a] if (tmplist.count(citer) == 0): tmplist.append(citer) author_cited_in[a] = tmplist else: author_cited_in[a] = these_cite_k mesg = "Author cit done fully" write_message(mesg) task_update_progress(mesg) return author_cited_in def ref_analyzer(citation_informations, initialresult, initial_citationlist, initial_referencelist,config, updated_rec_list ): """Analyze the citation informations and calculate the citation weight and cited by list dictionary. """ function = "" try: function = config.get("rank_method", "function") except: register_exception(prefix="cfg section [rank_method] has no attr function", alert_admin=True) return {} pubrefntag = "" try: pubrefntag = config.get(function, "reference_via_report_number") except: register_exception(prefix="cfg section "+function+" has no attr reference_via_report_number", alert_admin=True) return {} pubreftag = "" try: pubreftag = config.get(function, "reference_via_pubinfo") except: register_exception(prefix="cfg section "+function+" has no attr reference_via_pubinfo", alert_admin=True) return {} #pubrefntag is often 999C5r, pubreftag 999C5s if task_get_task_param('verbose') >= 9: write_message("pubrefntag "+pubrefntag) write_message("pubreftag "+pubreftag) citation_list = initial_citationlist reference_list = initial_referencelist result = initialresult d_reports_numbers = citation_informations[0] #dict of recid -> institute_give_publ_id d_references_report_numbers = citation_informations[1] #dict of recid -> ['astro-ph/xyz'..] d_references_s = citation_informations[2] #dict of recid -> publication_infos_in_its_bibliography d_records_s = citation_informations[3] #recid -> its publication inf t1 = os.times()[4] write_message("Phase 1: d_references_report_numbers") #d_references_report_numbers: e.g 8 -> ([astro-ph/9889],[hep-ph/768]) #meaning: rec 8 contains these in bibliography done = 0 numrecs = len(d_references_report_numbers) for thisrecid, refnumbers in d_references_report_numbers.iteritems(): if (done % 1000 == 0): mesg = "d_references_report_numbers done "+str(done)+" of "+str(numrecs) write_message(mesg) task_update_progress(mesg) #write to db! insert_into_cit_db(reference_list, "reversedict") insert_into_cit_db(citation_list, "citationdict") #it's ok to sleep too, we got something done task_sleep_now_if_required() done = done+1 for refnumber in refnumbers: if refnumber: p = refnumber f = 'reportnumber' #sanitise p p.replace("\n",'') #search for "hep-th/5644654 or such" in existing records rec_ids = get_recids_matching_query(p, f) if rec_ids and rec_ids[0]: write_citer_cited(thisrecid, rec_ids[0]) remove_from_missing(p) if not result.has_key(rec_ids[0]): result[rec_ids[0]] = 0 # Citation list should have rec_ids[0] but check anyway if not citation_list.has_key(rec_ids[0]): citation_list[rec_ids[0]] = [] #append unless this key already has the item if not thisrecid in citation_list[rec_ids[0]]: citation_list[rec_ids[0]].append(thisrecid) #and update result result[rec_ids[0]] += 1 if not reference_list.has_key(thisrecid): reference_list[thisrecid] = [] if not rec_ids[0] in reference_list[thisrecid]: reference_list[thisrecid].append(rec_ids[0]) else: #the reference we wanted was not found among our records. #put the reference in the "missing".. however, it will look #bad.. gfhgf/1254312, so get the corresponding 999C5s (full ref) too #This should really be done in the next loop d_references_s #but the 999C5s fields are not yet normalized #rectext = print_record(thisrecid, format='hm', ot=pubreftag[:-1]) rectext = "" # print_record() call disabled to speed things up lines = rectext.split("\n") rpart = p #to be used.. for l in lines: if (l.find(p) > 0): #the gfhgf/1254312 was found.. get the s-part of it st = l.find('$s') if (st > 0): end = l.find('$', st) if (end == st): end = len(l) rpart = l[st+2:end] insert_into_missing(thisrecid, rpart) mesg = "d_references_report_numbers done fully" write_message(mesg) task_update_progress(mesg) t2 = os.times()[4] #try to find references based on 999C5s, like Phys.Rev.Lett. 53 (1986) 2285 write_message("Phase 2: d_references_s") done = 0 numrecs = len(d_references_s) for thisrecid, refss in d_references_s.iteritems(): if (done % 1000 == 0): mesg = "d_references_s done "+str(done)+" of "+str(numrecs) write_message(mesg) task_update_progress(mesg) #write to db! insert_into_cit_db(reference_list, "reversedict") insert_into_cit_db(citation_list, "citationdict") task_sleep_now_if_required() done = done+1 for refs in refss: if refs: p = refs #remove the latter page number if it is like 67-74 matches = re.compile("(.*)(-\d+$)").findall(p) if matches and matches[0]: p = matches[0][0] rec_id = None try: rec_ids = list(search_unit(p, 'journal')) except: rec_ids = None write_message("These match searching "+p+" in journal: "+str(rec_id), verbose=9) if rec_ids and rec_ids[0]: #the refered publication is in our collection, remove #from missing remove_from_missing(p) else: #it was not found so add in missing insert_into_missing(thisrecid, p) #check citation and reference for this.. if rec_ids and rec_ids[0]: #the above should always hold if not result.has_key(rec_ids[0]): result[rec_ids[0]] = 0 if not citation_list.has_key(rec_ids[0]): citation_list[rec_ids[0]] = [] if not thisrecid in citation_list[rec_ids[0]]: citation_list[rec_ids[0]].append(thisrecid) #append actual list result[rec_ids[0]] += 1 #add count for this.. #update reference_list accordingly if not reference_list.has_key(thisrecid): reference_list[thisrecid] = [] if not rec_ids[0] in reference_list[thisrecid]: reference_list[thisrecid].append(rec_ids[0]) mesg = "d_references_s done fully" write_message(mesg) task_update_progress(mesg) t3 = os.times()[4] done = 0 numrecs = len(d_reports_numbers) write_message("Phase 3: d_reports_numbers") #search for stuff like CERN-TH-4859/87 in list of refs for thisrecid, reportcodes in d_reports_numbers.iteritems(): if (done % 1000 == 0): mesg = "d_report_numbers done "+str(done)+" of "+str(numrecs) write_message(mesg) task_update_progress(mesg) done = done+1 for reportcode in reportcodes: if reportcode: rec_ids = [] try: rec_ids = get_recids_matching_query(reportcode, pubrefntag) except: rec_ids = [] if rec_ids: for recid in rec_ids: #normal checks.. if not citation_list.has_key(thisrecid): citation_list[thisrecid] = [] if not reference_list.has_key(recid): reference_list[recid] = [] if not result.has_key(thisrecid): result[thisrecid] = 0 #normal updates if not recid in citation_list[thisrecid]: result[thisrecid] += 1 citation_list[thisrecid].append(recid) if not thisrecid in reference_list[recid]: reference_list[recid].append(thisrecid) mesg = "d_report_numbers done fully" write_message(mesg) task_update_progress(mesg) #find this record's pubinfo in other records' bibliography write_message("Phase 4: d_records_s") done = 0 numrecs = len(d_records_s) t4 = os.times()[4] for thisrecid, recs in d_records_s.iteritems(): if (done % 1000 == 0): mesg = "d_records_s done "+str(done)+" of "+str(numrecs) write_message(mesg) task_update_progress(mesg) done = done+1 p = recs.replace("\"","") #search the publication string like Phys. Lett., B 482 (2000) 417 in 999C5s rec_ids = list(search_unit(f=pubreftag, p=p, m='a')) write_message("These records match "+p+" in "+pubreftag+" : "+str(rec_ids), verbose=9) if rec_ids: for rec_id in rec_ids: #normal checks if not result.has_key(thisrecid): result[thisrecid] = 0 if not citation_list.has_key(thisrecid): citation_list[thisrecid] = [] if not reference_list.has_key(rec_id): reference_list[rec_id] = [] if not rec_id in citation_list[thisrecid]: result[thisrecid] += 1 citation_list[thisrecid].append(rec_id) if not thisrecid in reference_list[rec_id]: reference_list[rec_id].append(thisrecid) mesg = "d_records_s done fully" write_message(mesg) task_update_progress(mesg) write_message("Phase 5: reverse lists") #remove empty lists in citation and reference keys = citation_list.keys() for k in keys: if not citation_list[k]: del citation_list[k] keys = reference_list.keys() for k in keys: if not reference_list[k]: del reference_list[k] write_message("Phase 6: self-citations") selfdic = {} #get the initial self citation dict initial_self_dict = get_cit_dict("selfcitdict") selfdic = initial_self_dict #add new records to selfdic acit = task_get_option("author-citations") if not acit: write_message("Self cite processing disabled. Use -A option to enable it.") else: write_message("self cite and author citations enabled") selfdic = get_self_citations(updated_rec_list, citation_list, initial_self_dict, config) #selfdic consists of #key k -> list of values [v1,v2,..] #where k is a record with author A and k cites v1,v2.. and A appears in v1,v2.. #create a reverse "x cited by y" self cit dict selfcitedbydic = {} for k in selfdic.keys(): vlist = selfdic[k] for v in vlist: if selfcitedbydic.has_key(v): tmplist = selfcitedbydic[v] if not k in tmplist: tmplist.append(k) else: tmplist = [k] selfcitedbydic[v] = tmplist write_message("Getting author citations") #get author citations for records in updated_rec_list initial_author_dict = get_initial_author_dict() authorcitdic = initial_author_dict acit = task_get_option("author-citations") if not acit: print "Author cites disabled. Use -A option to enable it." else: write_message("author citations enabled") authorcitdic = get_author_citations(updated_rec_list, citation_list, initial_author_dict, config) if task_get_task_param('verbose') >= 3: #print only X first to prevent flood tmpdict = {} tmp = citation_list.keys()[0:10] for t in tmp: tmpdict[t] = citation_list[t] write_message("citation_list (x is cited by y): "+str(tmpdict)) write_message("size: "+str(len(citation_list.keys()))) tmp = reference_list.keys()[0:10] tmpdict = {} for t in tmp: tmpdict[t] = reference_list[t] write_message("reference_list (x cites y): "+str(tmpdict)) write_message("size: "+str(len(reference_list.keys()))) tmp = selfcitedbydic.keys()[0:10] tmpdict = {} for t in tmp: tmpdict[t] = selfcitedbydic[t] mesg = "selfcitedbydic (x is cited by y and one of the authors of x same as y's):" mesg += str(tmpdict) write_message(mesg) write_message("size: "+str(len(selfcitedbydic.keys()))) tmp = selfdic.keys()[0:100] tmpdict = {} for t in tmp: tmpdict[t] = selfdic[t] mesg = "selfdic (x cites y and one of the authors of x same as y's): "+str(tmpdict) write_message(mesg) write_message("size: "+str(len(selfdic.keys()))) tmp = authorcitdic.keys()[0:10] tmpdict = {} for t in tmp: tmpdict[t] = authorcitdic[t] write_message("authorcitdic (author is cited in recs): "+str(tmpdict)) write_message("size: "+str(len(authorcitdic.keys()))) insert_cit_ref_list_intodb(citation_list, reference_list, selfcitedbydic, selfdic, authorcitdic) t5 = os.times()[4] write_message("Execution time for analyzing the citation information generating the dictionary:") write_message("... checking ref number: %.2f sec" % (t2-t1)) write_message("... checking ref ypvt: %.2f sec" % (t3-t2)) write_message("... checking rec number: %.2f sec" % (t4-t3)) write_message("... checking rec ypvt: %.2f sec" % (t5-t4)) write_message("... total time of ref_analyze: %.2f sec" % (t5-t1)) return result def insert_cit_ref_list_intodb(citation_dic, reference_dic, selfcbdic, selfdic, authorcitdic): """Insert the reference and citation list into the database""" insert_into_cit_db(reference_dic,"reversedict") insert_into_cit_db(citation_dic,"citationdict") insert_into_cit_db(selfcbdic,"selfcitedbydict") insert_into_cit_db(selfdic,"selfcitdict") for a in authorcitdic.keys(): lserarr = (serialize_via_marshal(authorcitdic[a])) #author name: replace " with something else a.replace('"', '\'') a = unicode(a, 'utf-8') try: ablob = run_sql("select hitlist from rnkAUTHORDATA where aterm = %s", (a,)) if not (ablob): #print "insert into rnkAUTHORDATA(aterm,hitlist) values (%s,%s)" , (a,lserarr) run_sql("insert into rnkAUTHORDATA(aterm,hitlist) values (%s,%s)", (a,lserarr)) else: #print "UPDATE rnkAUTHORDATA SET hitlist = %s where aterm=%s""" , (lserarr,a) run_sql("UPDATE rnkAUTHORDATA SET hitlist = %s where aterm=%s", (lserarr,a)) except: register_exception(prefix="could not read/write rnkAUTHORDATA aterm="+a+" hitlist="+str(lserarr), alert_admin=True) def insert_into_cit_db(dic, name): """an aux thing to avoid repeating code""" ndate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) try: s = serialize_via_marshal(dic) write_message("size of "+name+" "+str(len(s))) #check that this column really exists testres = run_sql("select object_name from rnkCITATIONDATA where object_name = %s", (name,)) if testres: run_sql("UPDATE rnkCITATIONDATA SET object_value = %s where object_name = %s", (s, name)) else: #there was no entry for name, let's force.. run_sql("INSERT INTO rnkCITATIONDATA(object_name,object_value) values (%s,%s)", (name,s)) run_sql("UPDATE rnkCITATIONDATA SET last_updated = %s where object_name = %s", (ndate,name)) except: register_exception(prefix="could not write "+name+" into db", alert_admin=True) def get_cit_dict(name): """get a named citation dict from the db""" cdict = {} try: cdict = run_sql("select object_value from rnkCITATIONDATA where object_name = %s", (name,)) if cdict and cdict[0] and cdict[0][0]: dict_from_db = deserialize_via_marshal(cdict[0][0]) return dict_from_db else: return {} except: register_exception(prefix="could not read "+name+" from db", alert_admin=True) return dict def get_initial_author_dict(): """read author->citedinlist dict from the db""" adict = {} try: ah = run_sql("select aterm,hitlist from rnkAUTHORDATA") for (a, h) in ah: adict[a] = deserialize_via_marshal(h) return adict except: register_exception(prefix="could not read rnkAUTHORDATA", alert_admin=True) return {} def insert_into_missing(recid, report): """put the referingrecordnum-publicationstring into the "we are missing these" table""" report.replace('"','\'') try: srecid = str(recid) wasalready = run_sql("select id_bibrec from rnkCITATIONDATAEXT where id_bibrec = %s and extcitepubinfo = %s", (srecid,report)) if not wasalready: run_sql("insert into rnkCITATIONDATAEXT(id_bibrec, extcitepubinfo) values (%s,%s)", (srecid, report)) except: #we should complain but it can result to million lines of warnings so just pass.. pass def remove_from_missing(report): """remove the recid-ref -pairs from the "missing" table for report x: prob in the case ref got in our library collection""" report.replace('"','\'') try: run_sql("delete from rnkCITATIONDATAEXT where extcitepubinfo= %s", (report,)) except: #we should complain but it can result to million lines of warnings so just pass.. pass def create_analysis_tables(): """temporary simple table + index""" sql1 = "CREATE TABLE IF NOT EXISTS tmpcit (citer mediumint(10), cited mediumint(10)) TYPE=MyISAM" sql2 = "CREATE UNIQUE INDEX citercited on tmpcit(citer, cited)" sql3 = "CREATE INDEX citer on tmpcit(citer)" sql4 = "CREATE INDEX cited on tmpcit(cited)" try: run_sql(sql1) run_sql(sql2) run_sql(sql3) run_sql(sql4) except: pass def write_citer_cited(citer, cited): """write an entry to tmp table""" sciter = str(citer) scited = str(cited) try: run_sql("insert into tmpcit(citer, cited) values (%s,%s)", (sciter, scited)) except: pass def print_missing(num): """ Print the contents of rnkCITATIONDATAEXT table containing external records that were cited by NUM or more internal records. NUM is by default taken from the -E command line option. """ if not num: num = task_get_option("print-extcites") write_message("Listing external papers cited by %i or more internal records:" % num) res = run_sql("SELECT COUNT(id_bibrec), extcitepubinfo FROM rnkCITATIONDATAEXT \ GROUP BY extcitepubinfo HAVING COUNT(id_bibrec) >= %s \ ORDER BY COUNT(id_bibrec) DESC", (num,)) for (cnt, brec) in res: print str(cnt)+"\t"+brec write_message("Listing done.") def tagify(parsedtag): """aux auf to make '100__a' out of ['100','','','a']""" tag = "" for t in parsedtag: if t == '': t = '_' tag = tag+t return tag diff --git a/modules/bibrank/lib/bibrank_downloads_similarity.py b/modules/bibrank/lib/bibrank_downloads_similarity.py index 542c09ff9..8518aea3a 100644 --- a/modules/bibrank/lib/bibrank_downloads_similarity.py +++ b/modules/bibrank/lib/bibrank_downloads_similarity.py @@ -1,119 +1,99 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2005, 2006, 2007, 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = \ "$Id$" from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_CERN_SITE from invenio.dbquery import run_sql from invenio.bibrank_downloads_indexer import database_tuples_to_single_list - -def get_fieldvalues(recID, tag): - """Return list of field values for field TAG inside record RECID. - Copy from search_engine""" - out = [] - if tag == "001___": - # we have asked for recID that is not stored in bibXXx tables - out.append(str(recID)) - else: - # we are going to look inside bibXXx tables - digit = tag[0:2] - bx = "bib%sx" % digit - bibx = "bibrec_bib%sx" % digit - query = "SELECT bx.value FROM %s AS bx, %s AS bibx " \ - " WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND " \ - " bx.tag LIKE '%s' ORDER BY bibx.field_number, bx.tag ASC" % \ - (bx, bibx, recID, tag) - res = run_sql(query) - for row in res: - out.append(row[0]) - return out +from invenio.search_engine_utils import get_fieldvalues def record_exists(recID): """Return 1 if record RECID exists. Return 0 if it doesn't exist. Return -1 if it exists but is marked as deleted. Copy from search_engine""" out = 0 query = "SELECT id FROM bibrec WHERE id='%s'" % recID res = run_sql(query, None, 1) if res: # record exists; now check whether it isn't marked as deleted: dbcollids = get_fieldvalues(recID, "980__%") if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids): out = -1 # exists, but marked as deleted else: out = 1 # exists fine return out ### INTERFACE def register_page_view_event(recid, uid, client_ip_address): """Register Detailed record page view event for record RECID consulted by user UID from machine CLIENT_HOST_IP. To be called by the search engine. """ if CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: # do not register access if we are in read-only access control # site mode: return [] return run_sql("INSERT INTO rnkPAGEVIEWS " \ " (id_bibrec,id_user,client_host,view_time) " \ " VALUES (%s,%s,INET_ATON(%s),NOW())", \ (recid, uid, client_ip_address)) def calculate_reading_similarity_list(recid, type="pageviews"): """Calculate reading similarity data to use in reading similarity boxes (``people who downloaded/viewed this file/page have also downloaded/viewed''). Return list of (recid1, score1), (recid2,score2), ... for all recidN that were consulted by the same people who have also consulted RECID. The reading similarity TYPE can be either `pageviews' or `downloads', depending whether we want to obtain page view similarity or download similarity. """ if CFG_CERN_SITE: return [] # CERN hack 2009-11-23 to ease the load if type == "downloads": tablename = "rnkDOWNLOADS" else: # default tablename = "rnkPAGEVIEWS" # firstly compute the set of client hosts who consulted recid: client_host_list = run_sql("SELECT DISTINCT(client_host)" + \ " FROM " + tablename + \ " WHERE id_bibrec=%s " + \ " AND client_host IS NOT NULL", (recid,)) # secondly look up all recids that were consulted by these client hosts, # and order them by the number of different client hosts reading them: res = [] if client_host_list != (): client_host_list = str(database_tuples_to_single_list(client_host_list)) client_host_list = client_host_list.replace("L", "") client_host_list = client_host_list.replace("[", "") client_host_list = client_host_list.replace("]", "") res = run_sql("SELECT id_bibrec,COUNT(DISTINCT(client_host)) AS c" \ " FROM " + tablename + \ " WHERE client_host IN (" + client_host_list + ")" + \ " AND id_bibrec != %s" \ " GROUP BY id_bibrec ORDER BY c DESC LIMIT 10", (recid,)) return res diff --git a/modules/bibsword/lib/bibsword_webinterface.py b/modules/bibsword/lib/bibsword_webinterface.py index a690bd81f..c42863229 100644 --- a/modules/bibsword/lib/bibsword_webinterface.py +++ b/modules/bibsword/lib/bibsword_webinterface.py @@ -1,513 +1,513 @@ ''' Forward to ArXiv.org source code ''' ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" __lastupdated__ = """$Date$""" import os from invenio.access_control_engine import acc_authorize_action from invenio.config import CFG_SITE_URL, CFG_TMPDIR from invenio.webuser import page_not_authorized, collect_user_info from invenio.bibsword_client import perform_display_sub_status, \ perform_display_server_list, \ perform_display_collection_list, \ perform_display_category_list, \ perform_display_metadata, \ perform_submit_record, \ perform_display_server_infos, \ list_remote_servers from invenio.webpage import page from invenio.messages import gettext_set_language from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.websubmit_functions.Get_Recid import \ get_existing_records_for_reportnumber -from invenio.search_engine import get_fieldvalues +from invenio.search_engine_utils import get_fieldvalues from invenio.bibsword_config import CFG_MARC_REPORT_NUMBER, CFG_MARC_ADDITIONAL_REPORT_NUMBER class WebInterfaceSword(WebInterfaceDirectory): """ Handle /bibsword set of pages.""" _exports = ['', 'remoteserverinfos'] def __init__(self, reqid=None): '''Initialize''' self.reqid = reqid def __call__(self, req, form): errors = [] warnings = [] body = '' error_messages = [] #*********************************************************************** # Get values from the form #*********************************************************************** argd = wash_urlargd(form, { 'ln' : (str, ''), # information of the state of the form submission 'status' : (str, ''), 'submit' : (str, ''), 'last_row' : (str, ''), 'first_row' : (str, ''), 'offset' : (int, ''), 'total_rows' : (str, ''), # mendatory informations 'id_record' : (str, ''), 'recid' : (int, 0), 'id_remote_server' : (str, ''), 'id_collection' : (str, ''), 'id_primary' : (str, ''), 'id_categories' : (list, []), 'id' : (str, ''), 'title' : (str, ''), 'summary' : (str, ''), 'author_name' : (str, ''), 'author_email' : (str, ''), 'contributor_name' : (list, []), 'contributor_email' : (list, []), 'contributor_affiliation' : (list, []), # optionnal informations 'comment' : (str, ''), 'doi' : (str, ''), 'type' : (str, ''), 'journal_refs' : (list, []), 'report_nos' : (list, []), 'media' : (list, []), 'new_media' : (str, ''), 'filename' : (str, '') }) # set language for i18n text auto generation _ = gettext_set_language(argd['ln']) #authentication (auth_code, auth_message) = self.check_credential(req) if auth_code != 0: return page_not_authorized(req=req, referer='/bibsword', text=auth_message, navtrail='') user_info = collect_user_info(req) #Build contributor tuples {name, email and affiliation(s)} contributors = [] contributor_id = 0 affiliation_id = 0 for name in argd['contributor_name']: contributor = {} contributor['name'] = name contributor['email'] = argd['contributor_email'][contributor_id] contributor['affiliation'] = [] is_last_affiliation = False while is_last_affiliation == False and \ affiliation_id < len(argd['contributor_affiliation']): if argd['contributor_affiliation'][affiliation_id] == 'next': is_last_affiliation = True elif argd['contributor_affiliation'][affiliation_id] != '': contributor['affiliation'].append(\ argd['contributor_affiliation'][affiliation_id]) affiliation_id += 1 contributors.append(contributor) contributor_id += 1 argd['contributors'] = contributors # get the uploaded file(s) (if there is one) for key, formfields in form.items(): if key == "new_media" and hasattr(formfields, "filename") and formfields.filename: filename = formfields.filename fp = open(os.path.join(CFG_TMPDIR, filename), "w") fp.write(formfields.file.read()) fp.close() argd['media'].append(os.path.join(CFG_TMPDIR, filename)) argd['filename'] = os.path.join(CFG_TMPDIR, filename) # Prepare navtrail navtrail = '''Admin Area''' \ % {'CFG_SITE_URL': CFG_SITE_URL} title = _("BibSword Admin Interface") #*********************************************************************** # Display admin main page #*********************************************************************** if argd['status'] == '' and argd['recid'] != '' and argd['id_remote_server'] != '': remote_servers = list_remote_servers(argd['id_remote_server']) if len(remote_servers) == 0: error_messages.append("No corresponding remote server could be found") (body, errors, warnings) = perform_display_server_list( error_messages, argd['id_record']) else: title = _("Export with BibSword: Step 2/4") navtrail += ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL' : CFG_SITE_URL} (body, errors, warnings) = perform_display_collection_list( argd['id_remote_server'], argd['id_record'], argd['recid'], error_messages) elif argd['status'] == '' or argd['submit'] == "Cancel": (body, errors, warnings) = perform_display_sub_status() elif argd['status'] == 'display_submission': if argd['submit'] == 'Refresh all': (body, errors, warnings) = \ perform_display_sub_status(1, argd['offset'], "refresh_all") elif argd['submit'] == 'Select': first_row = 1 (body, errors, warnings) = \ perform_display_sub_status(first_row, argd['offset']) elif argd['submit'] == 'Next': first_row = int(argd['last_row']) + 1 (body, errors, warnings) = \ perform_display_sub_status(first_row, argd['offset']) elif argd['submit'] == 'Prev': first_row = int(argd['first_row']) - int(argd['offset']) (body, errors, warnings) = \ perform_display_sub_status(first_row, argd['offset']) elif argd['submit'] == 'First': (body, errors, warnings) = \ perform_display_sub_status(1, argd['offset']) elif argd['submit'] == 'Last': first_row = int(argd['total_rows']) - int(argd['offset']) + 1 (body, errors, warnings) = \ perform_display_sub_status(first_row, argd['offset']) #*********************************************************************** # Select remote server #*********************************************************************** # when the user validated the metadata, display elif argd['submit'] == 'New submission': title = _("Export with BibSword: Step 1/4") navtrail += ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL' : CFG_SITE_URL} (body, errors, warnings) = \ perform_display_server_list(error_messages) # check if the user has selected a remote server elif argd['status'] == 'select_server': title = _("Export with BibSword: Step 1/4") navtrail += ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL' : CFG_SITE_URL} # check if given id_record exist and convert it in recid if argd['recid'] != 0: report_numbers = get_fieldvalues(argd['recid'], CFG_MARC_REPORT_NUMBER) report_numbers.extend(get_fieldvalues(argd['recid'], CFG_MARC_ADDITIONAL_REPORT_NUMBER)) if report_numbers: argd['id_record'] = report_numbers[0] elif argd['id_record'] == '': error_messages.append("You must specify a report number") else: recids = \ get_existing_records_for_reportnumber(argd['id_record']) if len(recids) == 0: error_messages.append(\ "No document found with the given report number") elif len(recids) > 1: error_messages.append(\ "Several documents have been found with given the report number") else: argd['recid'] = recids[0] if argd['id_remote_server'] in ['0', '']: error_messages.append("No remote server was selected") if not argd['id_remote_server'] in ['0', '']: # get the server's name and host remote_servers = list_remote_servers(argd['id_remote_server']) if len(remote_servers) == 0: error_messages.append("No corresponding remote server could be found") argd['id_remote_server'] = '0' if argd['id_remote_server'] in ['0', ''] or argd['recid'] == 0: (body, errors, warnings) = perform_display_server_list( error_messages, argd['id_record']) else: title = _("Export with BibSword: Step 2/4") (body, errors, warnings) = perform_display_collection_list( argd['id_remote_server'], argd['id_record'], argd['recid'], error_messages) #*********************************************************************** # Select collection #*********************************************************************** # check if the user wants to change the remote server elif argd['submit'] == 'Modify server': title = _("Export with BibSword: Step 1/4") navtrail += ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL' : CFG_SITE_URL} (body, errors, warnings) = \ perform_display_server_list(error_messages, argd['id_record']) # check if the user has selected a collection elif argd['status'] == 'select_collection': title = _("Export with BibSword: Step 2/4") navtrail += ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL': CFG_SITE_URL} if argd['id_collection'] == '0': error_messages.append("No collection was selected") (body, errors, warnings) = perform_display_collection_list( argd['id_remote_server'], argd['id_record'], argd['recid'], error_messages) else: title = _("Export with BibSword: Step 3/4") (body, errors, warnings) = perform_display_category_list( argd['id_remote_server'], argd['id_collection'], argd['id_record'], argd['recid'], error_messages) #*********************************************************************** # Select primary #*********************************************************************** # check if the user wants to change the collection elif argd['submit'] == 'Modify collection': title = _("Export with BibSword: Step 2/4") navtrail += ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL': CFG_SITE_URL} (body, errors, warnings) = perform_display_collection_list( argd['id_remote_server'], argd['id_record'], argd['recid'], error_messages) # check if the user has selected a primary category elif argd['status'] == 'select_primary_category': title = _("Export with BibSword: Step 3/4") navtrail += ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL' : CFG_SITE_URL} if argd['id_primary'] == '0': error_messages.append("No primary category selected") (body, errors, warnings) = perform_display_category_list( argd['id_remote_server'], argd['id_collection'], argd['id_record'], argd['recid'], error_messages) else: title = _("Export with BibSword: Step 4/4") (body, errors, warnings) = perform_display_metadata(user_info, str(argd['id_remote_server']), str(argd['id_collection']), str(argd['id_primary']), argd['id_categories'], argd['id_record'], argd['recid'], error_messages) #*********************************************************************** # Check record media and metadata #*********************************************************************** # check if the user wants to change the collection elif argd['submit'] == 'Modify destination': title = _("Export with BibSword: Step 3/4") navtrail += ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL' : CFG_SITE_URL} (body, errors, warnings) = perform_display_category_list( argd['id_remote_server'], argd['id_collection'], argd['id_record'], argd['recid'], error_messages) # check if the metadata are complet and well-formed elif argd['status'] == 'check_submission': title = _("Export with BibSword: Step 4/4") navtrail += ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL' : CFG_SITE_URL} if argd['submit'] == "Upload": error_messages.append("Media loaded") if argd['id'] == '': error_messages.append("Id is missing") if argd['title'] == '': error_messages.append("Title is missing") if argd['summary'] == '': error_messages.append("summary is missing") elif len(argd['summary']) < 25: error_messages.append("summary must have at least 25 character") if argd['author_name'] == '': error_messages.append("No submitter name specified") if argd['author_email'] == '': error_messages.append("No submitter email specified") if len(argd['contributors']) == 0: error_messages.append("No author specified") if len(error_messages) > 0: (body, errors, warnings) = perform_display_metadata(user_info, str(argd['id_remote_server']), str(argd['id_collection']), str(argd['id_primary']), argd['id_categories'], argd['id_record'], argd['recid'], error_messages, argd) else: title = _("Export with BibSword: Acknowledgement") navtrail += ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL' : CFG_SITE_URL} (body, errors, warnings) = perform_submit_record(user_info, str(argd['id_remote_server']), str(argd['id_collection']), str(argd['id_primary']), argd['id_categories'], argd['recid'], argd) # return of all the updated informations to be display return page(title = title, body = body, navtrail = navtrail, #uid = uid, lastupdated = __lastupdated__, req = req, language = argd['ln'], #errors = errors, warnings = warnings, navmenuid = "yourmessages") def remoteserverinfos(self, req, form): ''' This method handle the /bibsword/remoteserverinfos call ''' argd = wash_urlargd(form, { 'ln' : (str, ''), 'id' : (str, '') }) #authentication (auth_code, auth_message) = self.check_credential(req) if auth_code != 0: return page_not_authorized(req=req, referer='/bibsword', text=auth_message, navtrail='') body = perform_display_server_infos(argd['id']) navtrail = ''' > ''' \ '''SWORD Interface''' % \ {'CFG_SITE_URL' : CFG_SITE_URL} # return of all the updated informations to be display return page(title = 'Remote server infos', body = body, navtrail = navtrail, #uid = uid, lastupdated = __lastupdated__, req = req, language = argd['ln'], errors = '', warnings = '', navmenuid = "yourmessages") def check_credential(self, req): ''' This method check if the user has the right to get into this function ''' auth_code, auth_message = acc_authorize_action(req, 'runbibswordclient') return (auth_code, auth_message) index = __call__ diff --git a/modules/miscutil/demo/democfgdata.sql b/modules/miscutil/demo/democfgdata.sql index 8757fe40c..8e509f8b2 100644 --- a/modules/miscutil/demo/democfgdata.sql +++ b/modules/miscutil/demo/democfgdata.sql @@ -1,2438 +1,2438 @@ -- This file is part of Invenio. -- Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. -- -- Invenio is free software; you can redistribute it and/or -- modify it under the terms of the GNU General Public License as -- published by the Free Software Foundation; either version 2 of the -- License, or (at your option) any later version. -- -- Invenio is distributed in the hope that it will be useful, but -- WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Invenio; if not, write to the Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. INSERT INTO user VALUES (2,'jekyll@cds.cern.ch',AES_ENCRYPT(email,'j123ekyll'),'1',NULL,'jekyll',''); INSERT INTO user VALUES (3,'hyde@cds.cern.ch',AES_ENCRYPT(email,'h123yde'),'1',NULL,'hyde',''); INSERT INTO user VALUES (4,'dorian.gray@cds.cern.ch',AES_ENCRYPT(email,'d123orian'),'1',NULL,'dorian',''); INSERT INTO user VALUES (5,'romeo.montague@cds.cern.ch',AES_ENCRYPT(email,'r123omeo'),'1','NULL','romeo',''); INSERT INTO user VALUES (6,'juliet.capulet@cds.cern.ch',AES_ENCRYPT(email,'j123uliet'),'1','NULL','juliet',''); INSERT INTO user VALUES (7,'benvolio.montague@cds.cern.ch',AES_ENCRYPT(email,'b123envolio'),'1','NULL','benvolio',''); INSERT INTO user VALUES (8,'balthasar.montague@cds.cern.ch',AES_ENCRYPT(email,'b123althasar'),'1','NULL','balthasar',''); INSERT INTO usergroup VALUES (1,'Theses viewers','Theses viewers internal group','VO','INTERNAL'); INSERT INTO usergroup VALUES (2,'montague-family','The Montague family.','VM','INTERNAL'); INSERT INTO user_usergroup VALUES (2,1,'M',NOW()); INSERT INTO user_usergroup VALUES (5,2,'A',NOW()); INSERT INTO user_usergroup VALUES (6,2,'M',NOW()); INSERT INTO user_usergroup VALUES (7,2,'M',NOW()); INSERT INTO collection VALUES (2,'Preprints','collection:PREPRINT',NULL,NULL); INSERT INTO collection VALUES (3,'Books','collection:BOOK',NULL,NULL); INSERT INTO collection VALUES (4,'Theses','collection:THESIS',NULL,NULL); INSERT INTO collection VALUES (5,'Reports','collection:REPORT',NULL,NULL); INSERT INTO collection VALUES (6,'Articles','collection:ARTICLE',NULL,NULL); INSERT INTO collection VALUES (8,'Pictures','collection:PICTURE',NULL,NULL); INSERT INTO collection VALUES (9,'CERN Divisions',NULL,NULL,NULL); INSERT INTO collection VALUES (10,'CERN Experiments',NULL,NULL,NULL); INSERT INTO collection VALUES (11,'Theoretical Physics (TH)','division:TH',NULL,NULL); INSERT INTO collection VALUES (12,'Experimental Physics (EP)','division:EP',NULL,NULL); INSERT INTO collection VALUES (13,'ISOLDE','experiment:ISOLDE',NULL,NULL); INSERT INTO collection VALUES (14,'ALEPH','experiment:ALEPH',NULL,NULL); INSERT INTO collection VALUES (15,'Articles & Preprints',NULL,NULL,NULL); INSERT INTO collection VALUES (16,'Books & Reports',NULL,NULL,NULL); INSERT INTO collection VALUES (17,'Multimedia & Arts',NULL,NULL,NULL); INSERT INTO collection VALUES (18,'Poetry','collection:POETRY',NULL,NULL); INSERT INTO collection VALUES (19,'Atlantis Times News','collection:ATLANTISTIMESNEWS',NULL,NULL); INSERT INTO collection VALUES (20,'Atlantis Times Arts','collection:ATLANTISTIMESARTS',NULL,NULL); INSERT INTO collection VALUES (21,'Atlantis Times Science','collection:ATLANTISTIMESSCIENCE',NULL,NULL); INSERT INTO collection VALUES (22,'Atlantis Times',NULL,NULL,NULL); INSERT INTO collection VALUES (23,'Atlantis Institute Books','hostedcollection:',NULL,NULL); INSERT INTO collection VALUES (24,'Atlantis Institute Articles','hostedcollection:',NULL,NULL); INSERT INTO collection VALUES (25,'Atlantis Times Drafts','collection:ATLANTISTIMESSCIENCEDRAFT or collection:ATLANTISTIMESARTSDRAFT or collection:ATLANTISTIMESNEWSDRAFT',NULL,NULL); INSERT INTO clsMETHOD VALUES (1,'HEP','http://invenio-software.org/download/invenio-demo-site-files/HEP.rdf','High Energy Physics Taxonomy','0000-00-00 00:00:00'); INSERT INTO clsMETHOD VALUES (2,'NASA-subjects','http://invenio-software.org/download/invenio-demo-site-files/NASA-subjects.rdf','NASA Subjects','0000-00-00 00:00:00'); INSERT INTO collection_clsMETHOD VALUES (2,1); INSERT INTO collection_clsMETHOD VALUES (12,2); INSERT INTO collectionname VALUES (2,'en','ln','Preprints'); INSERT INTO collectionname VALUES (2,'fr','ln','Prétirages'); INSERT INTO collectionname VALUES (2,'de','ln','Preprints'); INSERT INTO collectionname VALUES (2,'es','ln','Preprints'); INSERT INTO collectionname VALUES (2,'ca','ln','Preprints'); INSERT INTO collectionname VALUES (2,'pl','ln','Preprinty'); INSERT INTO collectionname VALUES (2,'pt','ln','Preprints'); INSERT INTO collectionname VALUES (2,'it','ln','Preprint'); INSERT INTO collectionname VALUES (2,'ru','ln','Препринты'); INSERT INTO collectionname VALUES (2,'sk','ln','Preprinty'); INSERT INTO collectionname VALUES (2,'cs','ln','Preprinty'); INSERT INTO collectionname VALUES (2,'no','ln','Førtrykk'); INSERT INTO collectionname VALUES (2,'sv','ln','Preprints'); INSERT INTO collectionname VALUES (2,'el','ln','Προδημοσιεύσεις'); INSERT INTO collectionname VALUES (2,'uk','ln','Препринти'); INSERT INTO collectionname VALUES (2,'ja','ln','プレプリント'); INSERT INTO collectionname VALUES (2,'bg','ln','Препринти'); INSERT INTO collectionname VALUES (2,'hr','ln','Preprinti'); INSERT INTO collectionname VALUES (2,'zh_CN','ln','预印'); INSERT INTO collectionname VALUES (2,'zh_TW','ln','預印'); INSERT INTO collectionname VALUES (2,'hu','ln','Preprintek'); INSERT INTO collectionname VALUES (2,'af','ln','Pre-drukke'); INSERT INTO collectionname VALUES (2,'gl','ln','Preprints'); INSERT INTO collectionname VALUES (2,'ro','ln','Preprinturi'); INSERT INTO collectionname VALUES (2,'rw','ln','Preprints'); INSERT INTO collectionname VALUES (2,'ka','ln','პრეპრინტები'); INSERT INTO collectionname VALUES (2,'lt','ln','Rankraščiai'); INSERT INTO collectionname VALUES (2,'ar','ln','مسودات'); INSERT INTO collectionname VALUES (3,'en','ln','Books'); INSERT INTO collectionname VALUES (3,'fr','ln','Livres'); INSERT INTO collectionname VALUES (3,'de','ln','Bücher'); INSERT INTO collectionname VALUES (3,'es','ln','Libros'); INSERT INTO collectionname VALUES (3,'ca','ln','Llibres'); INSERT INTO collectionname VALUES (3,'pl','ln','Książki'); INSERT INTO collectionname VALUES (3,'pt','ln','Livros'); INSERT INTO collectionname VALUES (3,'it','ln','Libri'); INSERT INTO collectionname VALUES (3,'ru','ln','Книги'); INSERT INTO collectionname VALUES (3,'sk','ln','Knihy'); INSERT INTO collectionname VALUES (3,'cs','ln','Knihy'); INSERT INTO collectionname VALUES (3,'no','ln','Bøker'); INSERT INTO collectionname VALUES (3,'sv','ln',''); INSERT INTO collectionname VALUES (3,'el','ln','Βιβλία'); INSERT INTO collectionname VALUES (3,'uk','ln','Книги'); INSERT INTO collectionname VALUES (3,'ja','ln','本'); INSERT INTO collectionname VALUES (3,'bg','ln','Книги'); INSERT INTO collectionname VALUES (3,'hr','ln','Knjige'); INSERT INTO collectionname VALUES (3,'zh_CN','ln','书本'); INSERT INTO collectionname VALUES (3,'zh_TW','ln','書本'); INSERT INTO collectionname VALUES (3,'hu','ln','Könyvek'); INSERT INTO collectionname VALUES (3,'af','ln','Boeke'); INSERT INTO collectionname VALUES (3,'gl','ln','Libros'); INSERT INTO collectionname VALUES (3,'ro','ln','Cărţi'); INSERT INTO collectionname VALUES (3,'rw','ln','Ibitabo'); INSERT INTO collectionname VALUES (3,'ka','ln','წიგნები'); INSERT INTO collectionname VALUES (3,'lt','ln','Knygos'); INSERT INTO collectionname VALUES (3,'ar','ln','كتب'); INSERT INTO collectionname VALUES (4,'en','ln','Theses'); INSERT INTO collectionname VALUES (4,'fr','ln','Thèses'); INSERT INTO collectionname VALUES (4,'de','ln','Dissertationen'); INSERT INTO collectionname VALUES (4,'es','ln','Tesis'); INSERT INTO collectionname VALUES (4,'ca','ln','Tesis'); INSERT INTO collectionname VALUES (4,'pl','ln','Prace naukowe'); INSERT INTO collectionname VALUES (4,'pt','ln','Teses'); INSERT INTO collectionname VALUES (4,'it','ln','Tesi'); INSERT INTO collectionname VALUES (4,'ru','ln','Диссертации'); INSERT INTO collectionname VALUES (4,'sk','ln','Dizertácie'); INSERT INTO collectionname VALUES (4,'cs','ln','Disertace'); INSERT INTO collectionname VALUES (4,'no','ln','Avhandlinger'); INSERT INTO collectionname VALUES (4,'sv','ln',''); INSERT INTO collectionname VALUES (4,'el','ln','Διατριβές'); INSERT INTO collectionname VALUES (4,'uk','ln','Дисертації'); INSERT INTO collectionname VALUES (4,'ja','ln','説'); INSERT INTO collectionname VALUES (4,'bg','ln','Дисертации'); INSERT INTO collectionname VALUES (4,'hr','ln','Disertacije'); INSERT INTO collectionname VALUES (4,'zh_CN','ln','论文'); INSERT INTO collectionname VALUES (4,'zh_TW','ln','論文'); INSERT INTO collectionname VALUES (4,'hu','ln','Disszertációk'); INSERT INTO collectionname VALUES (4,'af','ln','Tesise'); INSERT INTO collectionname VALUES (4,'gl','ln','Teses'); INSERT INTO collectionname VALUES (4,'ro','ln','Teze'); INSERT INTO collectionname VALUES (4,'rw','ln','Igitabo ky\'ubushakashatsi'); -- ' INSERT INTO collectionname VALUES (4,'ka','ln','თეზისები'); INSERT INTO collectionname VALUES (4,'lt','ln','Disertacijos'); INSERT INTO collectionname VALUES (4,'ar','ln','أطروحات'); INSERT INTO collectionname VALUES (5,'en','ln','Reports'); INSERT INTO collectionname VALUES (5,'fr','ln','Rapports'); INSERT INTO collectionname VALUES (5,'de','ln','Reports'); INSERT INTO collectionname VALUES (5,'es','ln','Informes'); INSERT INTO collectionname VALUES (5,'ca','ln','Informes'); INSERT INTO collectionname VALUES (5,'pl','ln','Raporty'); INSERT INTO collectionname VALUES (5,'pt','ln','Relatórios'); INSERT INTO collectionname VALUES (5,'it','ln','Rapporti'); INSERT INTO collectionname VALUES (5,'ru','ln','Рапорты'); INSERT INTO collectionname VALUES (5,'sk','ln','Správy'); INSERT INTO collectionname VALUES (5,'cs','ln','Zprávy'); INSERT INTO collectionname VALUES (5,'no','ln','Rapporter'); INSERT INTO collectionname VALUES (5,'sv','ln',''); INSERT INTO collectionname VALUES (5,'el','ln','Αναφορές'); INSERT INTO collectionname VALUES (5,'uk','ln','Звіти'); INSERT INTO collectionname VALUES (5,'ja','ln','レポート'); INSERT INTO collectionname VALUES (5,'bg','ln','Доклади'); INSERT INTO collectionname VALUES (5,'hr','ln','Izvještaji'); INSERT INTO collectionname VALUES (5,'zh_CN','ln','报告'); INSERT INTO collectionname VALUES (5,'zh_TW','ln','報告'); INSERT INTO collectionname VALUES (5,'hu','ln','Tanulmányok'); INSERT INTO collectionname VALUES (5,'af','ln','Verslae'); INSERT INTO collectionname VALUES (5,'gl','ln','Informes'); INSERT INTO collectionname VALUES (5,'ro','ln','Rapoarte'); INSERT INTO collectionname VALUES (5,'rw','ln','Raporo'); INSERT INTO collectionname VALUES (5,'ka','ln','რეპორტები'); INSERT INTO collectionname VALUES (5,'lt','ln','Pranešimai'); INSERT INTO collectionname VALUES (5,'ar','ln','تقارير'); INSERT INTO collectionname VALUES (6,'en','ln','Articles'); INSERT INTO collectionname VALUES (6,'fr','ln','Articles'); INSERT INTO collectionname VALUES (6,'de','ln','Artikel'); INSERT INTO collectionname VALUES (6,'es','ln','Articulos'); INSERT INTO collectionname VALUES (6,'ca','ln','Articles'); INSERT INTO collectionname VALUES (6,'pl','ln','Artykuły'); INSERT INTO collectionname VALUES (6,'pt','ln','Artigos'); INSERT INTO collectionname VALUES (6,'it','ln','Articoli'); INSERT INTO collectionname VALUES (6,'ru','ln','Статьи'); INSERT INTO collectionname VALUES (6,'sk','ln','Články'); INSERT INTO collectionname VALUES (6,'cs','ln','Články'); INSERT INTO collectionname VALUES (6,'no','ln','Artikler'); INSERT INTO collectionname VALUES (6,'sv','ln',''); INSERT INTO collectionname VALUES (6,'el','ln',"Άρθρα"); INSERT INTO collectionname VALUES (6,'uk','ln','Статті'); INSERT INTO collectionname VALUES (6,'ja','ln','記事'); INSERT INTO collectionname VALUES (6,'bg','ln','Статии'); INSERT INTO collectionname VALUES (6,'hr','ln','Članci'); INSERT INTO collectionname VALUES (6,'zh_CN','ln','文章'); INSERT INTO collectionname VALUES (6,'zh_TW','ln','文章'); INSERT INTO collectionname VALUES (6,'hu','ln','Cikkek'); INSERT INTO collectionname VALUES (6,'af','ln','Artikels'); INSERT INTO collectionname VALUES (6,'gl','ln','Artigos'); INSERT INTO collectionname VALUES (6,'ro','ln','Articole'); INSERT INTO collectionname VALUES (6,'rw','ln','Ikinyamakuru ky\'ubushakashatsi'); -- ' INSERT INTO collectionname VALUES (6,'ka','ln','სტატიები'); INSERT INTO collectionname VALUES (6,'lt','ln','Straipsniai'); INSERT INTO collectionname VALUES (6,'ar','ln','مقالات'); INSERT INTO collectionname VALUES (8,'en','ln','Pictures'); INSERT INTO collectionname VALUES (8,'fr','ln','Photos'); INSERT INTO collectionname VALUES (8,'de','ln','Fotos'); INSERT INTO collectionname VALUES (8,'es','ln','Imagenes'); INSERT INTO collectionname VALUES (8,'ca','ln','Imatges'); INSERT INTO collectionname VALUES (8,'pl','ln','Obrazy'); INSERT INTO collectionname VALUES (8,'pt','ln','Fotografias'); INSERT INTO collectionname VALUES (8,'it','ln','Foto'); INSERT INTO collectionname VALUES (8,'ru','ln','Фотографии'); INSERT INTO collectionname VALUES (8,'sk','ln','Fotografie'); INSERT INTO collectionname VALUES (8,'cs','ln','Fotografie'); INSERT INTO collectionname VALUES (8,'no','ln','Fotografier'); INSERT INTO collectionname VALUES (8,'sv','ln',''); INSERT INTO collectionname VALUES (8,'el','ln','Εικόνες'); INSERT INTO collectionname VALUES (8,'uk','ln','Зображення'); INSERT INTO collectionname VALUES (8,'ja','ln','映像'); INSERT INTO collectionname VALUES (8,'bg','ln','Снимки'); INSERT INTO collectionname VALUES (8,'hr','ln','Slike'); INSERT INTO collectionname VALUES (8,'zh_CN','ln','图片'); INSERT INTO collectionname VALUES (8,'zh_TW','ln','圖片'); INSERT INTO collectionname VALUES (8,'hu','ln','Képek'); INSERT INTO collectionname VALUES (8,'af','ln','Prente'); INSERT INTO collectionname VALUES (8,'gl','ln','Imaxes'); INSERT INTO collectionname VALUES (8,'ro','ln','Poze'); INSERT INTO collectionname VALUES (8,'rw','ln','Ifoto'); INSERT INTO collectionname VALUES (8,'ka','ln','სურათები'); INSERT INTO collectionname VALUES (8,'lt','ln','Paveikslėliai'); INSERT INTO collectionname VALUES (8,'ar','ln','صور'); INSERT INTO collectionname VALUES (9,'en','ln','CERN Divisions'); INSERT INTO collectionname VALUES (9,'fr','ln','Divisions du CERN'); INSERT INTO collectionname VALUES (9,'de','ln','Abteilungen des CERN'); INSERT INTO collectionname VALUES (9,'es','ln','Divisiones del CERN'); INSERT INTO collectionname VALUES (9,'ca','ln','Divisions del CERN'); INSERT INTO collectionname VALUES (9,'pl','ln','Działy CERN'); INSERT INTO collectionname VALUES (9,'pt','ln','Divisões do CERN'); INSERT INTO collectionname VALUES (9,'it','ln','Divisioni del CERN'); INSERT INTO collectionname VALUES (9,'ru','ln','Разделения CERNа'); INSERT INTO collectionname VALUES (9,'sk','ln','Oddelenia CERNu'); INSERT INTO collectionname VALUES (9,'cs','ln','Oddělení CERNu'); INSERT INTO collectionname VALUES (9,'no','ln','Divisjoner ved CERN'); INSERT INTO collectionname VALUES (9,'sv','ln',''); INSERT INTO collectionname VALUES (9,'el','ln','Τομείς του CERN'); INSERT INTO collectionname VALUES (9,'uk','ln','Підрозділи CERN'); INSERT INTO collectionname VALUES (9,'ja','ln','CERN 部'); INSERT INTO collectionname VALUES (9,'bg','ln','Отдели в CERN'); INSERT INTO collectionname VALUES (9,'hr','ln','Odjeli CERN-a'); INSERT INTO collectionname VALUES (9,'zh_CN','ln','CERN 分类'); INSERT INTO collectionname VALUES (9,'zh_TW','ln','CERN 分類'); INSERT INTO collectionname VALUES (9,'hu','ln','CERN részlegek'); INSERT INTO collectionname VALUES (9,'af','ln','CERN Afdelings'); INSERT INTO collectionname VALUES (9,'gl','ln','Divisións do CERN'); INSERT INTO collectionname VALUES (9,'ro','ln','Divizii CERN'); INSERT INTO collectionname VALUES (9,'rw','ln','Ishami ya CERN'); INSERT INTO collectionname VALUES (9,'ka','ln','ცერნის განყოფილებები'); INSERT INTO collectionname VALUES (9,'lt','ln','CERN Padaliniai'); INSERT INTO collectionname VALUES (9,'ar','ln','أقسام المنظمة الأوربية للبحوث النووية'); INSERT INTO collectionname VALUES (10,'en','ln','CERN Experiments'); INSERT INTO collectionname VALUES (10,'fr','ln','Expériences du CERN'); INSERT INTO collectionname VALUES (10,'de','ln','Experimente des CERN'); INSERT INTO collectionname VALUES (10,'es','ln','Experimentos del CERN'); INSERT INTO collectionname VALUES (10,'ca','ln','Experiments del CERN'); INSERT INTO collectionname VALUES (10,'pl','ln','Eksperymenty CERN'); INSERT INTO collectionname VALUES (10,'pt','ln','Experimentos do CERN'); INSERT INTO collectionname VALUES (10,'it','ln','Esperimenti del CERN'); INSERT INTO collectionname VALUES (10,'ru','ln','Эксперименты CERNа'); INSERT INTO collectionname VALUES (10,'sk','ln','Experimenty CERNu'); INSERT INTO collectionname VALUES (10,'cs','ln','Experimenty CERNu'); INSERT INTO collectionname VALUES (10,'no','ln','Eksperimenter ved CERN'); INSERT INTO collectionname VALUES (10,'sv','ln',''); INSERT INTO collectionname VALUES (10,'el','ln','Πειράματα του CERN'); INSERT INTO collectionname VALUES (10,'uk','ln','Експерименти CERN'); INSERT INTO collectionname VALUES (10,'ja','ln','CERN の実験'); INSERT INTO collectionname VALUES (10,'bg','ln','Експерименти в CERN'); INSERT INTO collectionname VALUES (10,'hr','ln','Eksperimenti CERN-a'); INSERT INTO collectionname VALUES (10,'zh_CN','ln','CERN 实验'); INSERT INTO collectionname VALUES (10,'zh_TW','ln','CERN 實驗'); INSERT INTO collectionname VALUES (10,'hu','ln','CERN kísérletek'); INSERT INTO collectionname VALUES (10,'af','ln','CERN Experimente'); INSERT INTO collectionname VALUES (10,'gl','ln','Experimentos do CERN'); INSERT INTO collectionname VALUES (10,'ro','ln','Experimente CERN'); INSERT INTO collectionname VALUES (10,'rw','ln','Ubushakashatsi bwa CERN'); INSERT INTO collectionname VALUES (10,'ka','ln','ცერნის ექსპერემენტები'); INSERT INTO collectionname VALUES (10,'lt','ln','CERN Eksperimentai'); INSERT INTO collectionname VALUES (10,'ar','ln','تجارب المنظمة الأوربية للبحوث النووية'); INSERT INTO collectionname VALUES (11,'en','ln','Theoretical Physics (TH)'); INSERT INTO collectionname VALUES (11,'fr','ln','Physique Théorique (TH)'); INSERT INTO collectionname VALUES (11,'de','ln','Theoretische Physik (TH)'); INSERT INTO collectionname VALUES (11,'es','ln','Física teórica (TH)'); INSERT INTO collectionname VALUES (11,'ca','ln','Física teòrica (TH)'); INSERT INTO collectionname VALUES (11,'pl','ln','Fizyka Teoretyczna (TH)'); INSERT INTO collectionname VALUES (11,'pt','ln','Física Teórica (TH)'); INSERT INTO collectionname VALUES (11,'it','ln','Fisica Teorica (TH)'); INSERT INTO collectionname VALUES (11,'ru','ln','Теоретическая физика (TH)'); INSERT INTO collectionname VALUES (11,'sk','ln','Teoretická fyzika (TH)'); INSERT INTO collectionname VALUES (11,'cs','ln','Teoretická fyzika (TH)'); INSERT INTO collectionname VALUES (11,'no','ln','Teoretisk fysikk (TH)'); INSERT INTO collectionname VALUES (11,'sv','ln',''); INSERT INTO collectionname VALUES (11,'el','ln','Θεωρητική Φυσική (TH)'); INSERT INTO collectionname VALUES (11,'uk','ln','Теоретична фізика (TH)'); INSERT INTO collectionname VALUES (11,'ja','ln','理論的な物理学 (TH)'); INSERT INTO collectionname VALUES (11,'bg','ln','Теоретична физика (TH)'); INSERT INTO collectionname VALUES (11,'hr','ln','Teorijska fizika (TH)'); INSERT INTO collectionname VALUES (11,'zh_CN','ln','理论物理 (TH)'); INSERT INTO collectionname VALUES (11,'zh_TW','ln','理論物理 (TH)'); INSERT INTO collectionname VALUES (11,'hu','ln','Elméleti fizika (TH)'); INSERT INTO collectionname VALUES (11,'af','ln','Teoretiese Fisika (TH)'); INSERT INTO collectionname VALUES (11,'gl','ln','Física Teórica (TH)'); INSERT INTO collectionname VALUES (11,'ro','ln','Fizică Teoretică (TH)'); INSERT INTO collectionname VALUES (11,'rw','ln','Theoretical Physics (TH)'); INSERT INTO collectionname VALUES (11,'ka','ln','თეორიული ფიზიკა (თფ)'); INSERT INTO collectionname VALUES (11,'lt','ln','Teorinė fizika (TH)'); INSERT INTO collectionname VALUES (11,'ar','ln','الفيزياء النظرية'); INSERT INTO collectionname VALUES (12,'en','ln','Experimental Physics (EP)'); INSERT INTO collectionname VALUES (12,'fr','ln','Physique Expérimentale (EP)'); INSERT INTO collectionname VALUES (12,'de','ln','Experimentelle Physik (EP)'); INSERT INTO collectionname VALUES (12,'es','ln','Física experimental (FE)'); INSERT INTO collectionname VALUES (12,'ca','ln','Física experimental (EP)'); INSERT INTO collectionname VALUES (12,'pl','ln','Fizyka Doświadczalna (EP)'); INSERT INTO collectionname VALUES (12,'pt','ln','Física Experimental (EP)'); INSERT INTO collectionname VALUES (12,'it','ln','Fisica Sperimentale (EP)'); INSERT INTO collectionname VALUES (12,'ru','ln','Экспериментальная Физика (EP)'); INSERT INTO collectionname VALUES (12,'sk','ln','Experimentálna fyzika (EP)'); INSERT INTO collectionname VALUES (12,'cs','ln','Experimentální fyzika (EP)'); INSERT INTO collectionname VALUES (12,'no','ln','Eksperimentell fysikk (EP)'); INSERT INTO collectionname VALUES (12,'sv','ln',''); INSERT INTO collectionname VALUES (12,'el','ln','Πειραματική Φυσική (EP)'); INSERT INTO collectionname VALUES (12,'uk','ln','Експериментальна фізика (EP)'); INSERT INTO collectionname VALUES (12,'ja','ln','実験物理学 (EP)'); INSERT INTO collectionname VALUES (12,'bg','ln','Експериментална физика (EP)'); INSERT INTO collectionname VALUES (12,'hr','ln','Eksperimentalna fizika (EP)'); INSERT INTO collectionname VALUES (12,'zh_CN','ln','实验物理 (EP)'); INSERT INTO collectionname VALUES (12,'zh_TW','ln','實驗物理 (EP)'); INSERT INTO collectionname VALUES (12,'hu','ln','Kísérleti fizika (EP)'); INSERT INTO collectionname VALUES (12,'af','ln','Eksperimentele Fisika (EP)'); INSERT INTO collectionname VALUES (12,'gl','ln','Física Experimental (EP)'); INSERT INTO collectionname VALUES (12,'ro','ln','Fizică Experimentală (EP)'); INSERT INTO collectionname VALUES (12,'rw','ln','Experimental Physics (EP)'); INSERT INTO collectionname VALUES (12,'ka','ln','ექსპერიმენტული ფიზიკა (ეფ)'); INSERT INTO collectionname VALUES (12,'lt','ln','Eksperimentinė fizika (EP)'); INSERT INTO collectionname VALUES (12,'ar','ln','الفيزياء التجريبية'); INSERT INTO collectionname VALUES (13,'en','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'fr','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'de','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'es','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'ca','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'pl','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'pt','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'it','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'ru','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'sk','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'cs','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'no','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'sv','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'el','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'uk','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'ja','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'bg','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'hr','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'zh_CN','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'zh_TW','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'hu','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'af','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'gl','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'ro','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'rw','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'ka','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'lt','ln','ISOLDE'); INSERT INTO collectionname VALUES (13,'ar','ln','ISOLDE'); INSERT INTO collectionname VALUES (14,'en','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'fr','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'de','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'es','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'ca','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'pl','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'pt','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'it','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'ru','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'sk','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'cs','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'no','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'sv','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'el','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'uk','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'ja','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'bg','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'hr','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'zh_CN','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'zh_TW','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'hu','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'af','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'gl','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'ro','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'rw','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'ka','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'lt','ln','ALEPH'); INSERT INTO collectionname VALUES (14,'ar','ln','ALEPH'); INSERT INTO collectionname VALUES (15,'en','ln','Articles & Preprints'); INSERT INTO collectionname VALUES (15,'fr','ln','Articles et Prétirages'); INSERT INTO collectionname VALUES (15,'de','ln','Artikel & Preprints'); INSERT INTO collectionname VALUES (15,'es','ln','Articulos y preprints'); INSERT INTO collectionname VALUES (15,'ca','ln','Articles i preprints'); INSERT INTO collectionname VALUES (15,'pl','ln','Artykuły i Preprinty'); INSERT INTO collectionname VALUES (15,'pt','ln','Artigos e Preprints'); INSERT INTO collectionname VALUES (15,'it','ln','Articoli e Preprint'); INSERT INTO collectionname VALUES (15,'ru','ln','Статьи и Препринты'); INSERT INTO collectionname VALUES (15,'sk','ln','Články a Preprinty'); INSERT INTO collectionname VALUES (15,'cs','ln','Články a Preprinty'); INSERT INTO collectionname VALUES (15,'no','ln','Artikler og Førtrykk'); INSERT INTO collectionname VALUES (15,'sv','ln',''); INSERT INTO collectionname VALUES (15,'el','ln',"Άρθρα & Προδημοσιεύσεις"); INSERT INTO collectionname VALUES (15,'uk','ln','Статті та Препринти'); INSERT INTO collectionname VALUES (15,'ja','ln','記事及びプレプリント'); INSERT INTO collectionname VALUES (15,'bg','ln','Статии и Препринти'); INSERT INTO collectionname VALUES (15,'hr','ln','Članci i Preprinti'); INSERT INTO collectionname VALUES (15,'zh_CN','ln','文章和预印'); INSERT INTO collectionname VALUES (15,'zh_TW','ln','文章和預印'); INSERT INTO collectionname VALUES (15,'hu','ln','Cikkek és Preprintek'); INSERT INTO collectionname VALUES (15,'af','ln','Artikels & Pre-drukke'); INSERT INTO collectionname VALUES (15,'gl','ln','Artigos e Preprints'); INSERT INTO collectionname VALUES (15,'ro','ln','Articole şi Preprinturi'); INSERT INTO collectionname VALUES (15,'rw','ln','Ibinyamakuru'); INSERT INTO collectionname VALUES (15,'ka','ln','სტატიები და პრეპრინტები'); INSERT INTO collectionname VALUES (15,'lt','ln','Straipsniai ir Rankraščiai'); INSERT INTO collectionname VALUES (15,'ar','ln','مقالات & مسودات'); INSERT INTO collectionname VALUES (16,'en','ln','Books & Reports'); INSERT INTO collectionname VALUES (16,'fr','ln','Livres et Rapports'); INSERT INTO collectionname VALUES (16,'de','ln','Monographien & Reports'); INSERT INTO collectionname VALUES (16,'es','ln','Libros e informes'); INSERT INTO collectionname VALUES (16,'ca','ln','Llibres i informes'); INSERT INTO collectionname VALUES (16,'pl','ln','Książki i Raporty'); INSERT INTO collectionname VALUES (16,'pt','ln','Livros e Relatórios'); INSERT INTO collectionname VALUES (16,'it','ln','Libri e Rapporti'); INSERT INTO collectionname VALUES (16,'ru','ln','Книги и Рапорты'); INSERT INTO collectionname VALUES (16,'sk','ln','Knihy a Správy'); INSERT INTO collectionname VALUES (16,'cs','ln','Knihy a Zprávy'); INSERT INTO collectionname VALUES (16,'no','ln','Bøker og Rapporter'); INSERT INTO collectionname VALUES (16,'sv','ln',''); INSERT INTO collectionname VALUES (16,'el','ln','Βιβλία & Αναφορές'); INSERT INTO collectionname VALUES (16,'uk','ln','Книги та Звіти'); INSERT INTO collectionname VALUES (16,'ja','ln','本及びレポート'); INSERT INTO collectionname VALUES (16,'bg','ln','Книги и Доклади'); INSERT INTO collectionname VALUES (16,'hr','ln','Knjige i Izvještaji'); INSERT INTO collectionname VALUES (16,'zh_CN','ln','书本和报告'); INSERT INTO collectionname VALUES (16,'zh_TW','ln','書本和報告'); INSERT INTO collectionname VALUES (16,'hu','ln','Könyvek és tanulmányok'); INSERT INTO collectionname VALUES (16,'af','ln','Boeke & Verslae'); INSERT INTO collectionname VALUES (16,'gl','ln','Libros e Informes'); INSERT INTO collectionname VALUES (16,'ro','ln','Cărţi şi Rapoarte'); INSERT INTO collectionname VALUES (16,'rw','ln','Ibitabo & Raporo'); INSERT INTO collectionname VALUES (16,'ka','ln','წიგნები და მოხსენებები'); INSERT INTO collectionname VALUES (16,'lt','ln','Knygos ir Pranešimai'); INSERT INTO collectionname VALUES (16,'ar','ln','كتب & تقارير'); INSERT INTO collectionname VALUES (17,'en','ln','Multimedia & Arts'); INSERT INTO collectionname VALUES (17,'fr','ln','Multimédia et Arts'); INSERT INTO collectionname VALUES (17,'de','ln','Multimedia & Kunst'); INSERT INTO collectionname VALUES (17,'es','ln','Multimedia y artes'); INSERT INTO collectionname VALUES (17,'ca','ln','Multimèdia i arts'); INSERT INTO collectionname VALUES (17,'pl','ln','Multimedia i Sztuka'); INSERT INTO collectionname VALUES (17,'pt','ln','Multimédia e Artes'); INSERT INTO collectionname VALUES (17,'it','ln','Multimedia e Arti'); INSERT INTO collectionname VALUES (17,'ru','ln','Мультимедиа и Исскуство'); INSERT INTO collectionname VALUES (17,'sk','ln','Multimédia a Umenie'); INSERT INTO collectionname VALUES (17,'cs','ln','Multimédia a Umění'); INSERT INTO collectionname VALUES (17,'no','ln','Multimedia og Grafikk'); INSERT INTO collectionname VALUES (17,'sv','ln',''); INSERT INTO collectionname VALUES (17,'el','ln','Πολυμέσα & Τέχνες'); INSERT INTO collectionname VALUES (17,'uk','ln','Мультимедіа та Мистецтво'); INSERT INTO collectionname VALUES (17,'ja','ln','マルチメディア及び芸術'); INSERT INTO collectionname VALUES (17,'bg','ln','Мултимедия и Изкуства'); INSERT INTO collectionname VALUES (17,'hr','ln','Multimedija i Umjetnost'); INSERT INTO collectionname VALUES (17,'zh_CN','ln','多媒体和艺术'); INSERT INTO collectionname VALUES (17,'zh_TW','ln','多媒體和藝術'); INSERT INTO collectionname VALUES (17,'hu','ln','Multimédia és képzőművészet'); INSERT INTO collectionname VALUES (17,'af','ln','Multimedia & Kunste'); INSERT INTO collectionname VALUES (17,'gl','ln','Multimedia e Arte'); INSERT INTO collectionname VALUES (17,'ro','ln','Multimedia şi Arte'); INSERT INTO collectionname VALUES (17,'rw','ln','Multimedia & Arts'); INSERT INTO collectionname VALUES (17,'ka','ln','მულტიმედია და ხელოვნება'); INSERT INTO collectionname VALUES (17,'lt','ln','Multimedija ir Menas'); INSERT INTO collectionname VALUES (17,'ar','ln','وسائط متعددة & فنون'); INSERT INTO collectionname VALUES (18,'en','ln','Poetry'); INSERT INTO collectionname VALUES (18,'fr','ln','Poésie'); INSERT INTO collectionname VALUES (18,'de','ln','Poesie'); INSERT INTO collectionname VALUES (18,'es','ln','Poesía'); INSERT INTO collectionname VALUES (18,'ca','ln','Poesia'); INSERT INTO collectionname VALUES (18,'pl','ln','Poezja'); INSERT INTO collectionname VALUES (18,'pt','ln','Poesia'); INSERT INTO collectionname VALUES (18,'it','ln','Poesia'); INSERT INTO collectionname VALUES (18,'ru','ln','Поэзия'); INSERT INTO collectionname VALUES (18,'sk','ln','Poézia'); INSERT INTO collectionname VALUES (18,'cs','ln','Poezie'); INSERT INTO collectionname VALUES (18,'no','ln','Poesi'); INSERT INTO collectionname VALUES (18,'sv','ln',''); INSERT INTO collectionname VALUES (18,'el','ln','Ποίηση'); INSERT INTO collectionname VALUES (18,'uk','ln','Поезія'); INSERT INTO collectionname VALUES (18,'ja','ln','詩歌'); INSERT INTO collectionname VALUES (18,'bg','ln','Поезия'); INSERT INTO collectionname VALUES (18,'hr','ln','Poezija'); INSERT INTO collectionname VALUES (18,'zh_CN','ln','诗歌'); INSERT INTO collectionname VALUES (18,'zh_TW','ln','詩歌'); INSERT INTO collectionname VALUES (18,'hu','ln','Költészet'); INSERT INTO collectionname VALUES (18,'af','ln','Poësie'); INSERT INTO collectionname VALUES (18,'gl','ln','Poesía'); INSERT INTO collectionname VALUES (18,'ro','ln','Poezie'); INSERT INTO collectionname VALUES (18,'rw','ln','Umuvugo'); INSERT INTO collectionname VALUES (18,'ka','ln','პოეზია'); INSERT INTO collectionname VALUES (18,'lt','ln','Poezija'); INSERT INTO collectionname VALUES (18,'ar','ln','شعر'); INSERT INTO collectionname VALUES (19,'en','ln','Atlantis Times News'); INSERT INTO collectionname VALUES (19,'fr','ln','Atlantis Times Actualités'); INSERT INTO collectionname VALUES (20,'en','ln','Atlantis Times Arts'); INSERT INTO collectionname VALUES (20,'fr','ln','Atlantis Times Arts'); INSERT INTO collectionname VALUES (21,'en','ln','Atlantis Times Science'); INSERT INTO collectionname VALUES (21,'fr','ln','Atlantis Times Science'); INSERT INTO collectionname VALUES (22,'en','ln','Atlantis Times'); INSERT INTO collectionname VALUES (22,'fr','ln','Atlantis Times'); INSERT INTO collectionname VALUES (23,'en','ln','Atlantis Institute Books'); INSERT INTO collectionname VALUES (23,'fr','ln','Atlantis Institute Books'); INSERT INTO collectionname VALUES (24,'en','ln','Atlantis Institute Articles'); INSERT INTO collectionname VALUES (24,'fr','ln','Atlantis Institute Articles'); INSERT INTO collectionname VALUES (25,'en','ln','Atlantis Times Drafts'); INSERT INTO collectionname VALUES (25,'fr','ln','Atlantis Times Ébauches'); INSERT INTO collection_collection VALUES (1,15,'r',60); INSERT INTO collection_collection VALUES (1,16,'r',40); INSERT INTO collection_collection VALUES (1,17,'r',30); -- INSERT INTO collection_collection VALUES (1,23,'r',20); -- INSERT INTO collection_collection VALUES (1,24,'r',10); INSERT INTO collection_collection VALUES (15,6,'r',20); INSERT INTO collection_collection VALUES (15,2,'r',10); INSERT INTO collection_collection VALUES (16,3,'r',30); INSERT INTO collection_collection VALUES (16,4,'r',20); INSERT INTO collection_collection VALUES (16,5,'r',10); INSERT INTO collection_collection VALUES (17,8,'r',30); INSERT INTO collection_collection VALUES (17,18,'r',20); INSERT INTO collection_collection VALUES (17,22,'r',10); INSERT INTO collection_collection VALUES (22,19,'r',30); INSERT INTO collection_collection VALUES (22,20,'r',20); INSERT INTO collection_collection VALUES (22,21,'r',10); INSERT INTO collection_collection VALUES (1,9,'v',20); INSERT INTO collection_collection VALUES (1,10,'v',10); INSERT INTO collection_collection VALUES (9,11,'r',10); INSERT INTO collection_collection VALUES (9,12,'r',20); INSERT INTO collection_collection VALUES (10,13,'r',10); INSERT INTO collection_collection VALUES (10,14,'r',20); INSERT INTO collection_example VALUES (1,1,1); INSERT INTO collection_example VALUES (1,5,2); INSERT INTO collection_example VALUES (1,8,3); INSERT INTO collection_example VALUES (1,7,5); INSERT INTO collection_example VALUES (1,6,4); INSERT INTO collection_example VALUES (1,4,6); INSERT INTO collection_example VALUES (1,3,7); INSERT INTO collection_example VALUES (1,13,50); INSERT INTO collection_example VALUES (1,2,8); INSERT INTO collection_example VALUES (2,1,1); INSERT INTO collection_example VALUES (2,5,2); INSERT INTO collection_example VALUES (2,8,3); INSERT INTO collection_example VALUES (2,7,5); INSERT INTO collection_example VALUES (2,6,4); INSERT INTO collection_example VALUES (2,4,6); INSERT INTO collection_example VALUES (2,3,7); INSERT INTO collection_example VALUES (2,2,8); INSERT INTO collection_example VALUES (3,6,30); INSERT INTO collection_example VALUES (3,17,10); INSERT INTO collection_example VALUES (3,18,20); INSERT INTO collection_example VALUES (4,1,1); INSERT INTO collection_example VALUES (4,5,2); INSERT INTO collection_example VALUES (4,8,3); INSERT INTO collection_example VALUES (4,7,5); INSERT INTO collection_example VALUES (4,6,4); INSERT INTO collection_example VALUES (4,4,6); INSERT INTO collection_example VALUES (4,3,7); INSERT INTO collection_example VALUES (4,2,8); INSERT INTO collection_example VALUES (5,1,1); INSERT INTO collection_example VALUES (5,5,2); INSERT INTO collection_example VALUES (5,8,3); INSERT INTO collection_example VALUES (5,7,5); INSERT INTO collection_example VALUES (5,6,4); INSERT INTO collection_example VALUES (5,4,6); INSERT INTO collection_example VALUES (5,3,7); INSERT INTO collection_example VALUES (5,2,8); INSERT INTO collection_example VALUES (6,1,10); INSERT INTO collection_example VALUES (6,5,20); INSERT INTO collection_example VALUES (6,8,30); INSERT INTO collection_example VALUES (6,0,27); INSERT INTO collection_example VALUES (6,4,40); INSERT INTO collection_example VALUES (6,3,60); INSERT INTO collection_example VALUES (6,2,80); INSERT INTO collection_example VALUES (8,14,10); INSERT INTO collection_example VALUES (8,15,20); INSERT INTO collection_example VALUES (8,16,30); INSERT INTO collection_example VALUES (15,0,27); INSERT INTO collection_example VALUES (15,1,1); INSERT INTO collection_example VALUES (15,2,8); INSERT INTO collection_example VALUES (15,3,60); INSERT INTO collection_example VALUES (15,4,40); INSERT INTO collection_example VALUES (15,5,2); INSERT INTO collection_example VALUES (15,6,4); INSERT INTO collection_example VALUES (15,7,5); INSERT INTO collection_example VALUES (15,8,3); INSERT INTO collection_example VALUES (16,1,1); INSERT INTO collection_example VALUES (16,2,8); INSERT INTO collection_example VALUES (16,3,7); INSERT INTO collection_example VALUES (16,4,6); INSERT INTO collection_example VALUES (16,5,2); INSERT INTO collection_example VALUES (16,6,4); INSERT INTO collection_example VALUES (16,7,5); INSERT INTO collection_example VALUES (16,8,3); INSERT INTO collection_example VALUES (17,14,10); INSERT INTO collection_example VALUES (17,15,20); INSERT INTO collection_example VALUES (17,16,30); INSERT INTO collection_example VALUES (1,19,0); INSERT INTO collection_example VALUES (15,19,0); INSERT INTO collection_example VALUES (16,19,0); INSERT INTO collection_field_fieldvalue VALUES (2,7,7,'seo',10,18); INSERT INTO collection_field_fieldvalue VALUES (2,7,6,'seo',10,19); INSERT INTO collection_field_fieldvalue VALUES (2,7,5,'seo',10,20); INSERT INTO collection_field_fieldvalue VALUES (2,7,4,'seo',10,21); INSERT INTO collection_field_fieldvalue VALUES (6,7,1,'seo',2,24); INSERT INTO collection_field_fieldvalue VALUES (6,7,2,'seo',2,23); INSERT INTO collection_field_fieldvalue VALUES (6,7,3,'seo',2,22); INSERT INTO collection_field_fieldvalue VALUES (6,7,4,'seo',2,21); INSERT INTO collection_field_fieldvalue VALUES (6,7,5,'seo',2,20); INSERT INTO collection_field_fieldvalue VALUES (6,7,6,'seo',2,19); INSERT INTO collection_field_fieldvalue VALUES (6,7,7,'seo',2,18); INSERT INTO collection_field_fieldvalue VALUES (6,7,8,'seo',2,17); INSERT INTO collection_field_fieldvalue VALUES (6,7,9,'seo',2,16); INSERT INTO collection_field_fieldvalue VALUES (6,7,10,'seo',2,15); INSERT INTO collection_field_fieldvalue VALUES (6,7,11,'seo',2,14); INSERT INTO collection_field_fieldvalue VALUES (6,7,12,'seo',2,13); INSERT INTO collection_field_fieldvalue VALUES (6,7,13,'seo',2,12); INSERT INTO collection_field_fieldvalue VALUES (6,7,14,'seo',2,11); INSERT INTO collection_field_fieldvalue VALUES (6,7,15,'seo',2,10); INSERT INTO collection_field_fieldvalue VALUES (6,7,16,'seo',2,9); INSERT INTO collection_field_fieldvalue VALUES (6,7,17,'seo',2,8); INSERT INTO collection_field_fieldvalue VALUES (6,7,18,'seo',2,7); INSERT INTO collection_field_fieldvalue VALUES (6,7,19,'seo',2,6); INSERT INTO collection_field_fieldvalue VALUES (6,7,20,'seo',2,5); INSERT INTO collection_field_fieldvalue VALUES (6,7,21,'seo',2,4); INSERT INTO collection_field_fieldvalue VALUES (6,7,22,'seo',2,3); INSERT INTO collection_field_fieldvalue VALUES (6,7,23,'seo',2,2); INSERT INTO collection_field_fieldvalue VALUES (6,7,24,'seo',2,1); INSERT INTO collection_field_fieldvalue VALUES (2,7,3,'seo',10,22); INSERT INTO collection_field_fieldvalue VALUES (2,7,2,'seo',10,23); INSERT INTO collection_field_fieldvalue VALUES (6,8,NULL,'sew',2,0); INSERT INTO collection_field_fieldvalue VALUES (2,7,1,'seo',10,24); INSERT INTO collection_field_fieldvalue VALUES (6,4,NULL,'sew',4,70); INSERT INTO collection_field_fieldvalue VALUES (6,2,NULL,'sew',3,70); INSERT INTO collection_field_fieldvalue VALUES (6,19,NULL,'sew',3,65); INSERT INTO collection_field_fieldvalue VALUES (6,5,NULL,'sew',1,70); INSERT INTO collection_field_fieldvalue VALUES (6,11,25,'seo',1,1); INSERT INTO collection_field_fieldvalue VALUES (6,11,26,'seo',1,2); INSERT INTO collection_field_fieldvalue VALUES (8,7,27,'seo',10,3); INSERT INTO collection_field_fieldvalue VALUES (8,7,28,'seo',10,1); INSERT INTO collection_field_fieldvalue VALUES (8,7,29,'seo',10,4); INSERT INTO collection_field_fieldvalue VALUES (8,7,30,'seo',10,2); INSERT INTO collection_field_fieldvalue VALUES (6,3,NULL,'sew',5,70); INSERT INTO collection_field_fieldvalue VALUES (2,7,8,'seo',10,17); INSERT INTO collection_field_fieldvalue VALUES (2,7,9,'seo',10,16); INSERT INTO collection_field_fieldvalue VALUES (2,7,10,'seo',10,15); INSERT INTO collection_field_fieldvalue VALUES (2,7,11,'seo',10,14); INSERT INTO collection_field_fieldvalue VALUES (2,7,12,'seo',10,13); INSERT INTO collection_field_fieldvalue VALUES (2,7,13,'seo',10,12); INSERT INTO collection_field_fieldvalue VALUES (2,7,14,'seo',10,11); INSERT INTO collection_field_fieldvalue VALUES (2,7,15,'seo',10,10); INSERT INTO collection_field_fieldvalue VALUES (2,7,16,'seo',10,9); INSERT INTO collection_field_fieldvalue VALUES (2,7,17,'seo',10,8); INSERT INTO collection_field_fieldvalue VALUES (2,7,18,'seo',10,7); INSERT INTO collection_field_fieldvalue VALUES (2,7,19,'seo',10,6); INSERT INTO collection_field_fieldvalue VALUES (2,7,20,'seo',10,5); INSERT INTO collection_field_fieldvalue VALUES (2,7,21,'seo',10,4); INSERT INTO collection_field_fieldvalue VALUES (2,7,22,'seo',10,3); INSERT INTO collection_field_fieldvalue VALUES (2,7,23,'seo',10,2); INSERT INTO collection_field_fieldvalue VALUES (2,7,24,'seo',10,1); INSERT INTO collection_field_fieldvalue VALUES (2,8,NULL,'sew',20,0); INSERT INTO collection_field_fieldvalue VALUES (2,4,NULL,'sew',40,70); INSERT INTO collection_field_fieldvalue VALUES (2,2,NULL,'sew',60,70); INSERT INTO collection_field_fieldvalue VALUES (2,5,NULL,'sew',30,70); INSERT INTO collection_field_fieldvalue VALUES (2,11,26,'seo',5,1); INSERT INTO collection_field_fieldvalue VALUES (2,3,NULL,'sew',50,70); INSERT INTO collection_field_fieldvalue VALUES (2,11,25,'seo',5,2); INSERT INTO collection_field_fieldvalue VALUES (2,11,32,'seo',5,0); INSERT INTO collection_field_fieldvalue VALUES (3,2,NULL,'sew',10,0); INSERT INTO collection_field_fieldvalue VALUES (3,3,NULL,'sew',20,0); INSERT INTO collection_field_fieldvalue VALUES (3,12,NULL,'sew',30,0); INSERT INTO collection_field_fieldvalue VALUES (4,4,NULL,'sew',30,0); INSERT INTO collection_field_fieldvalue VALUES (4,3,NULL,'sew',40,0); INSERT INTO collection_field_fieldvalue VALUES (4,12,NULL,'sew',10,0); INSERT INTO collection_field_fieldvalue VALUES (4,2,NULL,'sew',50,0); INSERT INTO collection_field_fieldvalue VALUES (4,6,NULL,'sew',20,0); INSERT INTO collection_field_fieldvalue VALUES (4,7,NULL,'seo',10,0); INSERT INTO collection_field_fieldvalue VALUES (4,7,12,'seo',10,2); INSERT INTO collection_field_fieldvalue VALUES (4,7,8,'seo',10,3); INSERT INTO collection_field_fieldvalue VALUES (4,7,10,'seo',10,1); INSERT INTO collection_field_fieldvalue VALUES (5,6,NULL,'sew',20,0); INSERT INTO collection_field_fieldvalue VALUES (5,12,NULL,'sew',10,0); INSERT INTO collection_field_fieldvalue VALUES (5,4,NULL,'sew',30,0); INSERT INTO collection_field_fieldvalue VALUES (5,3,NULL,'sew',40,0); INSERT INTO collection_field_fieldvalue VALUES (5,2,NULL,'sew',50,0); INSERT INTO collection_field_fieldvalue VALUES (5,7,NULL,'seo',10,0); INSERT INTO collection_field_fieldvalue VALUES (5,7,9,'seo',10,3); INSERT INTO collection_field_fieldvalue VALUES (5,7,12,'seo',10,2); INSERT INTO collection_field_fieldvalue VALUES (8,6,NULL,'sew',10,0); INSERT INTO collection_field_fieldvalue VALUES (8,2,NULL,'sew',50,0); INSERT INTO collection_field_fieldvalue VALUES (8,3,NULL,'sew',40,0); INSERT INTO collection_field_fieldvalue VALUES (8,5,NULL,'sew',20,0); INSERT INTO collection_field_fieldvalue VALUES (8,4,NULL,'sew',30,0); INSERT INTO collection_field_fieldvalue VALUES (1,2,NULL,'soo',40,0); INSERT INTO collection_field_fieldvalue VALUES (1,3,NULL,'soo',30,0); INSERT INTO collection_field_fieldvalue VALUES (1,6,NULL,'soo',20,0); INSERT INTO collection_field_fieldvalue VALUES (1,12,NULL,'soo',10,0); INSERT INTO collection_field_fieldvalue VALUES (3,2,NULL,'soo',40,0); INSERT INTO collection_field_fieldvalue VALUES (3,3,NULL,'soo',30,0); INSERT INTO collection_field_fieldvalue VALUES (3,15,NULL,'soo',20,0); INSERT INTO collection_field_fieldvalue VALUES (3,12,NULL,'soo',10,0); INSERT INTO collection_format VALUES (6,1,100); INSERT INTO collection_format VALUES (6,2,90); INSERT INTO collection_format VALUES (6,3,80); INSERT INTO collection_format VALUES (6,4,70); INSERT INTO collection_format VALUES (6,5,60); INSERT INTO collection_format VALUES (2,1,100); INSERT INTO collection_format VALUES (2,2,90); INSERT INTO collection_format VALUES (2,3,80); INSERT INTO collection_format VALUES (2,4,70); INSERT INTO collection_format VALUES (2,5,60); INSERT INTO collection_format VALUES (3,1,100); INSERT INTO collection_format VALUES (3,2,90); INSERT INTO collection_format VALUES (3,3,80); INSERT INTO collection_format VALUES (3,4,70); INSERT INTO collection_format VALUES (3,5,60); INSERT INTO collection_format VALUES (4,1,100); INSERT INTO collection_format VALUES (4,2,90); INSERT INTO collection_format VALUES (4,3,80); INSERT INTO collection_format VALUES (4,4,70); INSERT INTO collection_format VALUES (4,5,60); INSERT INTO collection_format VALUES (5,1,100); INSERT INTO collection_format VALUES (5,2,90); INSERT INTO collection_format VALUES (5,3,80); INSERT INTO collection_format VALUES (5,4,70); INSERT INTO collection_format VALUES (5,5,60); INSERT INTO collection_format VALUES (8,1,100); INSERT INTO collection_format VALUES (8,2,90); INSERT INTO collection_format VALUES (8,3,80); INSERT INTO collection_format VALUES (8,4,70); INSERT INTO collection_format VALUES (8,5,60); INSERT INTO collection_format VALUES (8,6,96); INSERT INTO collection_format VALUES (8,7,93); INSERT INTO collection_format VALUES (1,1,100); INSERT INTO collection_format VALUES (1,2,90); INSERT INTO collection_format VALUES (1,3,80); INSERT INTO collection_format VALUES (1,4,70); INSERT INTO collection_format VALUES (1,5,60); INSERT INTO collection_format VALUES (15,1,100); INSERT INTO collection_format VALUES (15,2,90); INSERT INTO collection_format VALUES (15,18,85); INSERT INTO collection_format VALUES (15,3,80); INSERT INTO collection_format VALUES (15,4,70); INSERT INTO collection_format VALUES (15,5,60); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,1,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,2,'en','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (6,3,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (6,49,'en','rt',95); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (6,4,'en','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (2,5,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (2,45,'en','rt',95); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (2,6,'en','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (3,7,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (3,46,'en','rt',95); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (3,8,'en','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (4,9,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (4,47,'en','rt',95); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (4,10,'en','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (5,11,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (5,48,'en','rt',95); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (8,14,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (8,50,'en','rt',95); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (9,15,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (10,16,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (11,17,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (12,18,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (13,19,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (14,20,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (15,21,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (15,51,'en','rt',95); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (16,22,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (16,52,'en','rt',95); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (17,23,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (17,53,'en','rt',95); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (18,24,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,25,'fr','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,26,'fr','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,27,'sk','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,28,'sk','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,29,'cs','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,30,'cs','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,31,'de','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,32,'de','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,33,'es','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,34,'es','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,35,'it','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,36,'it','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,37,'no','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,38,'no','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,39,'pt','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,40,'pt','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,41,'ru','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,42,'ru','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,43,'sv','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,44,'sv','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,54,'el','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,55,'el','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,56,'uk','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,57,'uk','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,58,'ca','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,59,'ca','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,60,'ja','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,61,'ja','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,62,'pl','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,63,'pl','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,64,'bg','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,65,'bg','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,66,'hr','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,67,'hr','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,68,'zh_CN','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,69,'zh_CN','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,70,'zh_TW','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,71,'zh_TW','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,72,'hu','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,73,'hu','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,74,'af','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,75,'af','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,76,'gl','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,77,'gl','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (19,78,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (20,78,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (21,78,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (22,78,'en','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,79,'ro','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,80,'ro','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,81,'rw','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,82,'rw','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,83,'ka','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,84,'ka','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,85,'lt','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,86,'lt','rt',90); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,87,'ar','rt',100); INSERT INTO collection_portalbox (id_collection,id_portalbox,ln,position,score) VALUES (1,88,'ar','rt',90); INSERT INTO example VALUES (1,'author search','author:"Ellis, J"'); INSERT INTO example VALUES (2,'word search','quantum'); INSERT INTO example VALUES (3,'wildcard word search','quant*'); INSERT INTO example VALUES (4,'phrase search','title:\'standard model\''); INSERT INTO example VALUES (5,'boolean search','quark -sigma +dense'); INSERT INTO example VALUES (6,'complex boolean search','author:draper title:electrical'); INSERT INTO example VALUES (7,'complex boolean search','author:ellis -muon* +abstract:\'dense quark matter\''); INSERT INTO example VALUES (8,'boolean search','ellis muon*'); INSERT INTO example VALUES (13,'reference search','references:"Theor. Math. Phys. 2 (1998) 231"'); INSERT INTO example VALUES (14,'phrase search','abstract:\'Higgs boson\''); INSERT INTO example VALUES (15,'wildcard word search','cal*'); INSERT INTO example VALUES (16,'keyword search','keyword:Nobel'); INSERT INTO example VALUES (17,'author search','author:Cole'); INSERT INTO example VALUES (18,'phrase search','title:\'nuclear electronics\''); INSERT INTO example VALUES (19,'combined search','supergravity and author:"Ellis, J" and year:1980->1990'); INSERT INTO fieldvalue VALUES (1,'Particle Physics','Particle Physics'); INSERT INTO fieldvalue VALUES (2,'Particle Physics - Experimental Results','Particle Physics - Experimental Results'); INSERT INTO fieldvalue VALUES (3,'Particle Physics - Phenomenology','Particle Physics - Phenomenology'); INSERT INTO fieldvalue VALUES (4,'Particle Physics - Theory','Particle Physics - Theory'); INSERT INTO fieldvalue VALUES (5,'Particle Physics - Lattice','Particle Physics - Lattice'); INSERT INTO fieldvalue VALUES (6,'Nuclear Physics','Nuclear Physics'); INSERT INTO fieldvalue VALUES (7,'General Relativity and Cosmology','General Relativity and Cosmology'); INSERT INTO fieldvalue VALUES (8,'General Theoretical Physics','General Theoretical Physics'); INSERT INTO fieldvalue VALUES (9,'Detectors and Experimental Techniques','Detectors and Experimental Techniques'); INSERT INTO fieldvalue VALUES (10,'Accelerators and Storage Rings','Accelerators and Storage Rings'); INSERT INTO fieldvalue VALUES (11,'Health Physics and Radiation Effects','Health Physics and Radiation Effects'); INSERT INTO fieldvalue VALUES (12,'Computing and Computers','Computing and Computers'); INSERT INTO fieldvalue VALUES (13,'Mathematical Physics and Mathematics','Mathematical Physics and Mathematics'); INSERT INTO fieldvalue VALUES (14,'Astrophysics and Astronomy','Astrophysics and Astronomy'); INSERT INTO fieldvalue VALUES (15,'Nonlinear Systems','Nonlinear Systems'); INSERT INTO fieldvalue VALUES (16,'Condensed Matter','Condensed Matter'); INSERT INTO fieldvalue VALUES (17,'Other Fields of Physics','Other Fields of Physics'); INSERT INTO fieldvalue VALUES (18,'Chemical Physics and Chemistry','Chemical Physics and Chemistry'); INSERT INTO fieldvalue VALUES (19,'Engineering','Engineering'); INSERT INTO fieldvalue VALUES (20,'Information Transfer and Management','Information Transfer and Management'); INSERT INTO fieldvalue VALUES (21,'Other Aspects of Science','Other Aspects of Science'); INSERT INTO fieldvalue VALUES (22,'Commerce, Economics, Social Science','Commerce, Economics, Social Science'); INSERT INTO fieldvalue VALUES (23,'Biography, Geography, History','Biography, Geography, History'); INSERT INTO fieldvalue VALUES (24,'Other Subjects','Other Subjects'); INSERT INTO fieldvalue VALUES (25,'CERN TH','TH'); INSERT INTO fieldvalue VALUES (26,'CERN PPE','PPE'); INSERT INTO fieldvalue VALUES (27,'Experiments and Tracks','Experiments and Tracks'); INSERT INTO fieldvalue VALUES (28,'Personalities and History of CERN','Personalities and History of CERN'); INSERT INTO fieldvalue VALUES (29,'Diagrams and Charts','Diagrams and Charts'); INSERT INTO fieldvalue VALUES (30,'Life at CERN','Life at CERN'); INSERT INTO fieldvalue VALUES (31,'CERN ETT','ETT'); INSERT INTO fieldvalue VALUES (32,'CERN EP','EP'); INSERT INTO oaiREPOSITORY VALUES (2,'CERN experimental papers','cern:experiment','','','c=;p1=CERN;f1=reportnumber;m1=a;p2=(EP|PPE);f2=division;m2=r;p3=;f3=;m3=;',NULL,'CERN','reportnumber','a','(EP|PPE)','division','r','','',''); INSERT INTO oaiREPOSITORY VALUES (3,'CERN theoretical papers','cern:theory','','','c=;p1=CERN;f1=reportnumber;m1=a;p2=TH;f2=division;m2=e;p3=;f3=;m3=;',NULL,'CERN','reportnumber','a','TH','division','e','','',''); INSERT INTO portalbox VALUES (1,'ABOUT THIS SITE','Welcome to the demo site of the Invenio, a free document server software coming from CERN. Please feel free to explore all the features of this demo site to the full.'); INSERT INTO portalbox VALUES (2,'SEE ALSO','Invenio
    CERN'); INSERT INTO portalbox VALUES (3,'ABOUT ARTICLES','The Articles collection contains all the papers published in scientific journals by our staff. The collection starts from 1998.'); INSERT INTO portalbox VALUES (4,'SEE ALSO','arXiv.org
    CDS
    ChemWeb
    MathSciNet'); INSERT INTO portalbox VALUES (5,'ABOUT PREPRINTS','The Preprints collection contains not-yet-published papers and research results obtained at the institute. The collection starts from 2001.'); INSERT INTO portalbox VALUES (6,'SEE ALSO','arXiv.org
    CDS'); INSERT INTO portalbox VALUES (7,'ABOUT BOOKS','The Books collection contains monographs published by institute staff as well as pointers to interesting online e-books available in fulltext.'); INSERT INTO portalbox VALUES (8,'SEE ALSO','UV e-Books
    Project Gutenberg'); INSERT INTO portalbox VALUES (9,'ABOUT THESES','The Theses collection contains all students\' theses defended at the institute. The collection starts from 1950.'); INSERT INTO portalbox VALUES (10,'SEE ALSO','NDLTD Theses
    Thesis.DE'); INSERT INTO portalbox VALUES (11,'ABOUT REPORTS','The Reports collection contains miscellaneous technical reports, unpublished elsewhere. The collection starts from 1950.'); INSERT INTO portalbox VALUES (12,'TEST portal box','this is a test portal box'); INSERT INTO portalbox VALUES (13,'test','this is a test portal box'); INSERT INTO portalbox VALUES (14,'ABOUT PICTURES','The Pictures collection contains selected photographs and illustrations. Please note that photographs are copyrighted. The collection includes historical archive that starts from 1950.'); INSERT INTO portalbox VALUES (15,'ABOUT CERN DIVISIONS','These virtual collections present a specific point of view on the database content from CERN Divisions persective.'); INSERT INTO portalbox VALUES (16,'ABOUT CERN EXPERIMENTS','These virtual collections present a specific point of view on the database content from CERN Experiments persective.'); INSERT INTO portalbox VALUES (17,'ABOUT TH','This virtual collection groups together all the documents written by authors from CERN TH Division.'); INSERT INTO portalbox VALUES (18,'ABOUT EP','This virtual collection groups together all the documents written by authors from CERN EP Division.'); INSERT INTO portalbox VALUES (19,'ABOUT ISOLDE','This virtual collection groups together all the documents about ISOLDE CERN experiment.'); INSERT INTO portalbox VALUES (20,'ABOUT ALEPH','This virtual collection groups together all the documents about ALEPH CERN experiment.'); INSERT INTO portalbox VALUES (21,'ABOUT ARTICLES AND PREPRINTS','This collection groups together all published and non-published articles, many of which in electronic fulltext form.'); INSERT INTO portalbox VALUES (22,'ABOUT BOOKS AND REPORTS','This collection groups together all monograph-like publications, be they books, theses, reports, book chapters, proceedings, and so on.'); INSERT INTO portalbox VALUES (23,'ABOUT MULTIMEDIA & OUTREACH','This collection groups together all multimedia- and outreach- oriented material.'); INSERT INTO portalbox VALUES (24,'ABOUT POETRY','This collection presents poetry excerpts, mainly to demonstrate and test the treatment of various languages.

    Vitrum edere possum; mihi non nocet.
    Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα.
    Pòdi manjar de veire, me nafrariá pas.
    Ég get etið gler án þess að meiða mig.
    Ic mæg glæs eotan ond hit ne hearmiað me.
    ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬
    ⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑
    Pot să mănânc sticlă și ea nu mă rănește.
    Meg tudom enni az üveget, nem lesz tőle bajom.
    Môžem jesť sklo. Nezraní ma.
    אני יכול לאכול זכוכית וזה לא מזיק לי.
    איך קען עסן גלאָז און עס טוט מיר נישט װײ.
    أنا قادر على أكل الزجاج و هذا لا يؤلمني.
    Я могу есть стекло, оно мне не вредит.
    მინას ვჭამ და არა მტკივა.
    Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։
    मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती.
    काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥
    ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ
    Tôi có thể ăn thủy tinh mà không hại gì.
    我能吞下玻璃而不伤身体。
    私はガラスを食べられます。それは私を傷つけません。
    나는 유리를 먹을 수 있어요. 그래도 아프지 않아요
    (http://www.columbia.edu/kermit/utf8.html)'); INSERT INTO portalbox VALUES (25,'À PROPOS DE CE SITE','Bienvenue sur le site de démonstration de Invenio, un logiciel libre pour des serveurs des documents, venant du CERN. Veuillez explorer les possibilités de ce site de démonstration de tous ses côtés.'); INSERT INTO portalbox VALUES (26,'VOIR AUSSI','Invenio
    CERN'); INSERT INTO portalbox VALUES (27,'O TÝCHTO STRÁNKACH','Vitajte na demonštračných stránkach Invenio, voľne dostupného softwaru pre dokumentové servery, pochádzajúceho z CERNu. Prehliadnite si možnosti našeho demonštračného serveru podla ľubovôle.'); INSERT INTO portalbox VALUES (28,'VIĎ TIEŽ','Invenio
    CERN'); INSERT INTO portalbox VALUES (29,'O TĚCHTO STRÁNKÁCH','Vítejte na demonstračních stránkách Invenio, volně dostupného softwaru pro dokumentové servery, pocházejícího z CERNu. Prohlédněte si možnosti našeho demonstračního serveru podle libosti.'); INSERT INTO portalbox VALUES (30,'VIZ TÉŽ','Invenio
    CERN'); INSERT INTO portalbox VALUES (31,'ÜBER DIESEN SEITEN','Willkommen Sie bei der Demo-Seite des Invenio, des Dokument Management Systems Software aus CERN. Hier können Sie den System gleich und frei ausprobieren.'); INSERT INTO portalbox VALUES (32,'SEHEN SIE AUCH','Invenio
    CERN'); INSERT INTO portalbox VALUES (33,'ACERCA DE ESTAS PÁGINAS','Bienvenidos a las páginas de demostración de Invenio, un software gratuito desarrollado por el CERN que permite crear un servidor de documentos. Le invitamos a explorar a fondo todas las funcionalidades ofrecidas por estas páginas de demostración.'); INSERT INTO portalbox VALUES (34,'VEA TAMBIÉN','Invenio
    CERN'); INSERT INTO portalbox VALUES (35,'A PROPOSITO DI QUESTO SITO','Benvenuti nel sito demo di Invenio, un software libero per server di documenti sviluppato al CERN. Vi invitiamo ad esplorare a fondo tutte le caratteristiche di questo sito demo.'); INSERT INTO portalbox VALUES (36,'VEDI ANCHE','Invenio
    CERN'); INSERT INTO portalbox VALUES (37,'OM DENNE SIDEN','Velkommen til demosiden for Invenio, en gratis dokumentserver fra CERN. Vennligst føl deg fri til å utforske alle mulighetene i denne demoen til det fulle.'); INSERT INTO portalbox VALUES (38,'SE OGSÅ','Invenio
    CERN'); INSERT INTO portalbox VALUES (39,'SOBRE ESTE SITE','Bem vindo ao site de demonstração do Invenio, um servidor de documentos livre desenvolvido pelo CERN. Sinta-se à vontade para explorar plenamente todos os recursos deste site demonstração.'); INSERT INTO portalbox VALUES (40,'VEJA TAMBÉM','Invenio
    CERN'); INSERT INTO portalbox VALUES (41,'ОБ ЭТОМ САЙТЕ','Добро пожаловать на наш демонстрационный сайт Invenio. Invenio -- свободная программа для серверов документов, разработанная в CERNе. Пожалуйста пользуйтесь свободно этим сайтом.'); INSERT INTO portalbox VALUES (42,'СМОТРИТЕ ТАКЖЕ','Invenio
    CERN'); INSERT INTO portalbox VALUES (43,'OM DENNA WEBBPLATS','Välkommen till demoinstallationen av Invenio, en fri programvara för hantering av dokument, från CERN. Välkommen att undersöka alla funktioner i denna installation.'); INSERT INTO portalbox VALUES (44,'SE ÄVEN','Invenio
    CERN'); INSERT INTO portalbox VALUES (45,'SUBMIT PREPRINT','Submit a new preprint'); INSERT INTO portalbox VALUES (46,'SUBMIT BOOK','Submit a new book'); INSERT INTO portalbox VALUES (47,'SUBMIT THESIS','Submit a new thesis'); INSERT INTO portalbox VALUES (48,'SUBMIT REPORT','Submit a new report'); INSERT INTO portalbox VALUES (49,'SUBMIT ARTICLE','Submit a new article'); INSERT INTO portalbox VALUES (50,'SUBMIT PICTURE','Submit a new picture'); INSERT INTO portalbox VALUES (51,'SUBMIT NEW DOCUMENT','Submit a new article
    Submit a new preprint'); INSERT INTO portalbox VALUES (52,'SUBMIT NEW DOCUMENT','Submit a new book
    Submit a new thesis
    Submit a new report'); INSERT INTO portalbox VALUES (53,'SUBMIT NEW DOCUMENT','Submit a new picture'); INSERT INTO portalbox VALUES (54,'ΣΧΕΤΙΚΑ ΜΕ ΤΗΝ ΣΕΛΙΔΑ','Καλως ήλθατε στον δικτυακό τόπο του Invenio, ενός δωρεάν εξυπηρετητή για έγγραφα προερχόμενο απο το CERN. Είστε ευπρόσδεκτοι να εξερευνήσετε σε βάθος τις δυνατότητες που σας παρέχει ο δικτυακός αυτός τόπος.'); INSERT INTO portalbox VALUES (55,'ΔΕΙΤΕ ΕΠΙΣΗΣ','Invenio
    CERN'); INSERT INTO portalbox VALUES (56,'ПРО ЦЕЙ САЙТ','Ласкаво просимо до демонстраційного сайту Invenio, вільного програмного забезпечення, розробленого CERN. Випробуйте всі можливості цього демонстраційного сайту в повному обсязі.'); INSERT INTO portalbox VALUES (57,'ДИВИСЬ ТАКОЖ','Invenio
    CERN'); INSERT INTO portalbox VALUES (58,'SOBRE AQUEST LLOC','Benvinguts al lloc de demo de Invenio, un servidor de documents lliure originat al CERN. Us convidem a explorar a fons totes les funcionalitats ofertes en aquestes pàgines de demostració.'); INSERT INTO portalbox VALUES (59,'VEGEU TAMBÉ','Invenio
    CERN'); INSERT INTO portalbox VALUES (60,'この場所について','Invenioデモンストレーションの場所への歓迎, CERN から来る自由な文書のサーバーソフトウェア, このデモンストレーションの場所の特徴すべてを探検する自由の感じ'); INSERT INTO portalbox VALUES (61,'また見なさい','Invenio
    CERN'); INSERT INTO portalbox VALUES (62,'O TEJ STRONIE','Witamy w wersji demo systemu Invenio, darmowego oprogramowania do obsługi serwera dokumentów, stworzonego w CERN. Zachęcamy do odkrywania wszelkich funkcjonalności oferowanych przez tę stronę.'); INSERT INTO portalbox VALUES (63,'ZOBACZ TAKŻE','Invenio
    CERN'); INSERT INTO portalbox VALUES (64,'ЗА САЙТА','Добре дошли на демонстрационния сайт на Invenio, свободен софтуер за документни сървъри изработен в ЦЕРН. Чувствайте се свободни да изследвате всяка една от характеристиките на сайта.'); INSERT INTO portalbox VALUES (65,'ВИЖ СЪЩО','Invenio
    CERN'); INSERT INTO portalbox VALUES (66,'O OVOM SITE-u','Dobrodošli na Invenio demo site. Invenio je slobodno dostupan poslužitelj dokumenata razvijen na CERN-u. Slobodno istražite sve mogućnosti ove aplikacije.'); INSERT INTO portalbox VALUES (67,'TAKOĐER POGLEDAJTE','Invenio
    CERN'); INSERT INTO portalbox VALUES (68,'关于这个网站','欢迎来到Invenio 的示范网站!Invenio是一个由CERN开发的免费文件服务器软件。 要了解这网站所提供的各项特点, 请立刻行动,尽情探索。'); INSERT INTO portalbox VALUES (69,'参见','Invenio
    CERN'); INSERT INTO portalbox VALUES (70,'關於這個網站', '歡迎來到Invenio 的示範網站!Invenio是一個由CERN開發的免費文件伺服器軟體。 要瞭解這網站所提供的各項特點, 請立刻行動,盡情探索。'); INSERT INTO portalbox VALUES (71,'參見','Invenio
    CERN'); INSERT INTO portalbox VALUES (72,'IMPRESSZUM', 'Üdvözöljük a Invenio bemutatóoldalain! Ezt a szabad dokumentumkezelő szoftvert a CERN-ben fejlesztették. Fedezze fel bátran a tesztrendszer nyújtotta szolgáltatásokat!'); INSERT INTO portalbox VALUES (73,'LÁSD MÉG','Invenio
    CERN'); INSERT INTO portalbox VALUES (74,'OMTRENT HIERDIE TUISTE', 'Welkom by die demo tuiste van Invenio, gratis dokument bediener sagteware wat deur CERN geskryf is. Voel vry om al die eienskappe van die demo te deursoek.'); INSERT INTO portalbox VALUES (75,'SIEN OOK','Invenio
    CERN'); INSERT INTO portalbox VALUES (76,'ACERCA DESTE SITIO', 'Benvido ó sitio de demostración do Invenio, un software de servidor de documentos do CERN. Por favor síntete libre de explorar todas as características deste sitio de demostración.'); INSERT INTO portalbox VALUES (77,'VEXA TAMÉN','Invenio
    CERN'); INSERT INTO portalbox VALUES (78,'ABOUT ATLANTIS TIMES','The \"Atlantis Times\" collections contain the articles from the \Atlantis Times journal.'); INSERT INTO portalbox VALUES (79,'DESPRE ACEST SITE', 'Bine aţi venit pe site-ul demo al Invenio, un software gratuit pentru servere de documente, creat de CERN. Nu ezitaţi să exploraţi din plin toate caracteristicile acestui site demo.'); INSERT INTO portalbox VALUES (80,'ALTE RESURSE','Invenio
    CERN'); INSERT INTO portalbox VALUES (81,'IBYEREKERANYE N\'IYI WEB', 'Murakzaneza kuri web ya Invenio, iyi ni koranabuhanga y\'ubuntu ya kozwe na CERN. Bitimuntu afite uburenganzira bwo kuyigerageza no kuyikoresha.'); INSERT INTO portalbox VALUES (82,'REBA N\'IBI','Invenio
    CERN'); -- ' INSERT INTO portalbox VALUES (83,'საიტის შესახებ', 'კეთილი იყოს თქვენი მობრძანება Invenio -ის სადემონსტრაციო საიტზე, თავისუფალი დოკუმენტების სერვერი CERN -ისაგან. გთხოვთ სრულად შეისწავლოთ სადემონსტრაციო საიტის შესაძლებლობები.'); INSERT INTO portalbox VALUES (84,'ასევე იხილეთ','Invenio
    CERN'); -- ' INSERT INTO portalbox VALUES (85,'APIE PUSLAPĮ', 'Sveiki atvykę į Invenio bandomąjį tinklapį. Invenio yra nemokama programinė įranga dokumentų serveriams, sukurta CERN. Kviečiame išbandyti visas tinklapio galimybes ir funkcijas.'); INSERT INTO portalbox VALUES (86,'TAIP PAT ŽIŪRĖKITE','Invenio
    CERN'); -- ' INSERT INTO portalbox VALUES (87,'حول هذا الموقع','مرحبا بكم في الموقع التجريبي لإنفينيو، المحطة الخادمة (الحرة) المبرمجة من طرف المنظمة الأوربية للبحوث النووية. الرجاء عدم التردد للإطلاع على جميع صفحات هذا الموقع التجريبي'); INSERT INTO portalbox VALUES (88,'زوروا أيضا','Invenio
    CERN'); INSERT INTO sbmCOLLECTION VALUES (36,'Document Types'); INSERT INTO sbmCOLLECTION_sbmCOLLECTION VALUES (0,36,1); INSERT INTO sbmCOLLECTION_sbmDOCTYPE VALUES (36,'DEMOTHE',1); INSERT INTO sbmCOLLECTION_sbmDOCTYPE VALUES (36,'DEMOPOE',2); INSERT INTO sbmCOLLECTION_sbmDOCTYPE VALUES (36,'DEMOPIC',3); INSERT INTO sbmCOLLECTION_sbmDOCTYPE VALUES (36,'DEMOART',4); INSERT INTO sbmCOLLECTION_sbmDOCTYPE VALUES (36,'DEMOBOO',5); INSERT INTO sbmCOLLECTION_sbmDOCTYPE VALUES (36,'DEMOJRN',6); INSERT INTO sbmCATEGORIES (doctype,sname,lname,score) VALUES ('DEMOPIC','LIFE','Life at CERN',3); INSERT INTO sbmCATEGORIES (doctype,sname,lname,score) VALUES ('DEMOPIC','HIST','Personalities and History of CERN',2); INSERT INTO sbmCATEGORIES (doctype,sname,lname,score) VALUES ('DEMOPIC','EXP','Experiments',1); INSERT INTO sbmCATEGORIES (doctype,sname,lname,score) VALUES ('DEMOART','ARTICLE','Article',1); INSERT INTO sbmCATEGORIES (doctype,sname,lname,score) VALUES ('DEMOART','PREPRINT','Preprint',2); INSERT INTO sbmCATEGORIES (doctype,sname,lname,score) VALUES ('DEMOART','REPORT','Report',3); INSERT INTO sbmCATEGORIES (doctype,sname,lname,score) VALUES ('DEMOJRN','NEWS','News',2); INSERT INTO sbmCATEGORIES (doctype,sname,lname,score) VALUES ('DEMOJRN','ARTS','Arts',1); INSERT INTO sbmCATEGORIES (doctype,sname,lname,score) VALUES ('DEMOJRN','SCIENCE','Science',4); INSERT INTO sbmDOCTYPE VALUES ('Demo Picture Submission','DEMOPIC','2007-09-13','2007-10-17','

    \r\nThe Demo Picture submission demonstrates a slightly more detailed submission type.
    \r\nIt makes use of different categories (which in this case are used in the picture\'s reference number to better describe it) and creates icons for the submitted picture files. Records created with this submission are inserted into the ATLANTIS \"Pictures\" collection.\r\n

    \r\n'); INSERT INTO sbmDOCTYPE VALUES ('Demo Thesis Submission','DEMOTHE','2008-03-02','2008-03-05','
    \r\n
    \r\nThe Demo Thesis submission demonstrates a very simple submission type.
    \r\nIt has no categories, submits directly into the ATLANTIS \"Theses\" collection and also stamps full-text files.\r\n

    \r\n'); INSERT INTO sbmDOCTYPE VALUES ('Demo Article Submission','DEMOART','2008-03-06','2008-03-06','

    The Demo Article submission demonstrates a more complex submission type.

    \r\nThe submission gives a document a category. This category is used in the document\'s reference number and also serves as a means to classify it into a specific ATLANTIS collection. Documents submitted into the \"Article\" category are inserted into the ATLANTIS \"Articles\" collection, documents categorized as \"Preprint\" are inserted into the ATLANTIS \"Preprints\" collection, and a document categorized as a \"Report\" is inserted into the ATLANTIS \"Reports\" collection.

    \r\n'); INSERT INTO sbmDOCTYPE VALUES ('Demo Book Submission (Refereed)','DEMOBOO','2008-03-06','2008-03-06','

    The Demo Book submission demonstrates a refereed submission.

    \r\nWhen the details of a book are submitted by a user, they must be approved by a referee before the record is integrated into the ATLANTIS repository.
    \r\nApproved books are integrated into the ATLANTIS \"Books\" collection.
    \r\n'); INSERT INTO sbmDOCTYPE VALUES ('Demo Poetry Submission','DEMOPOE','2008-03-12','2008-03-12','

    \r\nThe Demo Poetry submission demonstrates a simple submission type with a submission form split over two pages.
    \r\nIt does not use categories. Records created with this submission are inserted into the ATLANTIS \"Poetry\" collection.\r\n

    '); INSERT INTO sbmDOCTYPE VALUES ('Demo Journal Submission','DEMOJRN','2008-09-18','2008-09-18','The Demo Journal submission submits records that will be integrated into the demo "Atlantis Times" journal.
    \r\n Makes use of FCKeditor to provide WYSIWYG HTML edition of the articles. Install it with make install-fckeditor-plugin.'); INSERT INTO sbmFIELD VALUES ('SBIDEMOPIC',1,1,'DEMOPIC_TITLE','

    Submit an ATLANTIS picture:

    *Picture Title:
    ','M','Picture Title','','2007-09-13','2007-10-04',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPIC',1,2,'DEMOPIC_PHOTOG','

    Picture Author(s) or Photographers(s): (one per line)
    ','O','Photographer(s)','','2007-09-13','2007-09-13',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPIC',1,3,'DEMOPIC_DATE','

    *Picture Date: (dd/mm/yyyy) ','M','Picture Date','DatCheckNew','2007-09-13','2007-10-04',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPIC',1,4,'DEMOPIC_KW','

    Keywords:
    (one keyword/key-phrase per line)
    ','O','Picture Keywords','','2007-09-13','2007-09-13',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPIC',1,5,'DEMOPIC_DESCR','

    Picture Description:
    ','O','Picture Description','','2007-09-13','2007-09-13',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPIC',1,6,'DEMOPIC_ADD_RN','

    Your picture will be given a reference number automatically.
    However, if the picture has other reference numbers, please enter them here:
    (one per line)
    ','O','Additional Reference Numbers','','2007-09-13','2007-09-13',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPIC',1,7,'DEMOPIC_NOTE','

    Additional Comments or Notes about the Picture:
    ','O','Picture Notes or Comments','','2007-09-13','2007-09-13',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPIC',1,8,'Upload_Photos','

    Select the photo(s) to upload:
    ','O','Picture File','','2007-09-13','2007-09-13',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPIC',1,9,'DEMOPIC_FINISH','

    ','O','','','2007-09-13','2007-09-13',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOPIC',1,1,'DEMOPIC_RN','

    Modify a picture\'s bibliographic information:

    *Picture Reference Number:  ','M','Reference Number','','2007-10-04','2007-10-04',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOPIC',1,2,'DEMOPIC_CHANGE','

    *Choose the fields to be modified:
    ','M','Fields to Modify','','2007-10-04','2007-10-04',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOPIC',1,3,'DEMOPIC_CONT','

    ','O','','','2007-10-04','2007-10-04',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SRVDEMOPIC',1,1,'DEMOPIC_RN','

    Revise/add pictures:

    *Picture Reference Number:  ','M','Reference Number','','2009-04-09','2009-04-09',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SRVDEMOPIC',1,2,'DEMOPIC_CONT','

    ','O','','','2009-04-09','2009-04-09',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,1,'DEMOTHE_REP','

    Submit an ATLANTIS Thesis:

    Your thesis will be given a reference number automatically.
    However, if it has other reference numbers, please enter them here:
    (one per line)
    ','O','Other Report Numbers','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,2,'DEMOTHE_TITLE','

    *Thesis Title:
    ','M','Title','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,3,'DEMOTHE_SUBTTL','

    Thesis Subtitle (if any):
    ','O','Subtitle','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,4,'DEMOTHE_AU','

    *Author of the Thesis: (one per line)
    ','M','Author(s)','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,5,'DEMOTHE_SUPERV','

    Thesis Supervisor(s): (one per line)
    ','O','Thesis Supervisor(s)','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,6,'DEMOTHE_ABS','

    *Abstract:
    ','M','Abstract','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,7,'DEMOTHE_NUMP','

    Number of Pages: ','O','Number of Pages','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,8,'DEMOTHE_LANG','

    *Language: ','M','Thesis Language','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,9,'DEMOTHE_PUBL','

    *Thesis Publisher (or Institute): ','M','Thesis Publisher/University','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,10,'DEMOTHE_PLDEF',' at *Place/Town: ','M','Place of Thesis Defence','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,11,'DEMOTHE_DIPL','

    *Diploma Awarded: ','M','Diploma Awarded','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,12,'DEMOTHE_DATE','

    *Thesis Defence date (dd/mm/yyyy): ','M','Date of Thesis Defence','DatCheckNew','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,13,'DEMOTHE_UNIV','
    *Awarding University: ','M','Awarding University','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,14,'DEMOTHE_PLACE',' at *Place/Town: ','M','Awarding University town','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,15,'DEMOTHE_FILE','

    *Enter the full path to the source file to upload:
    ','M','Source File','','2008-03-02','2008-03-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOTHE',1,16,'DEMOTHE_END','


    ','O','','','2008-03-02','2008-03-02',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOTHE',1,1,'DEMOTHE_RN','

    Modify a thesis\' bibliographic information:

    *Thesis Reference Number:  ','M','Reference Number','','2008-03-05','2008-03-05',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOTHE',1,2,'DEMOTHE_CHANGE','

    *Choose the fields to be modified:
    ','M','Fields to Modify','','2008-03-05','2008-03-05',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOTHE',1,3,'DEMOTHE_CONT','

    ','O','','','2008-03-05','2008-03-05',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,1,'DEMOART_REP','

    Submit an ATLANTIS Article:

    Your document will be given a reference number automatically.
    However, if it has other reference numbers, please enter them here:
    (one per line)
    ','O','Other Report Numbers','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,2,'DEMOART_TITLE','

    *Document Title:
    ','M','Title','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,3,'DEMOART_AU','

    *Author of the Document: (one per line)
    ','M','Author(s)','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,4,'DEMOART_ABS','

    *Abstract:
    ','M','Abstract','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,5,'DEMOART_NUMP','

    Number of Pages: ','O','Number of Pages','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,6,'DEMOART_LANG','

    *Language: ','O','Language','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,7,'DEMOART_DATE','

    *Date of Document: (dd/mm/yyyy) ','M','Date of Document','DatCheckNew','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,8,'DEMOART_KW','

    Keywords/Key-phrases: (one per line)
    ','O','Keywords/Key-phrases','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,9,'DEMOART_NOTE','

    Additional Notes or Comments:
    ','O','Notes/Comments','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,10,'DEMOART_FILE','

    *Enter the full path to the source file to upload:
    ','M','Source File','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOART',1,11,'DEMOART_END','


    ','O','','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOART',1,1,'DEMOART_RN','

    Modify an article\'s bibliographic information:

    *Document Reference Number:  ','M','Reference Number','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOART',1,2,'DEMOART_CHANGE','

    *Choose the fields to be modified:
    ','M','Fields to Modify','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOART',1,3,'DEMOART_CONT','

    ','O','','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,1,'DEMOBOO_REP','

    Submit an ATLANTIS Book:

    Your book will be given a reference number automatically.
    However, if it has other reference numbers, please enter them here:
    (one per line)
    ','O','Other Report Numbers','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,2,'DEMOBOO_TITLE','

    *Book Title:
    ','M','Title','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,3,'DEMOBOO_AU','

    *Author of the Book: (one per line)
    ','M','Author(s)','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,4,'DEMOBOO_ABS','

    *Abstract:
    ','M','Abstract','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,5,'DEMOBOO_NUMP','

    Number of Pages: ','O','Number of Pages','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,6,'DEMOBOO_LANG','

    *Language: ','O','Language','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,7,'DEMOBOO_DATE','

    *Date of the Book: (dd/mm/yyyy) ','M','Date of Document','DatCheckNew','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,8,'DEMOBOO_KW','

    Keywords/Key-phrases: (one per line)
    ','O','Keywords/Key-phrases','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,9,'DEMOBOO_NOTE','

    Additional Notes or Comments:
    ','O','Notes/Comments','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,10,'DEMOBOO_FILE','

    Enter the full path to the source file to upload:
    ','O','Source File','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOBOO',1,11,'DEMOBOO_END','


    ','O','','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOBOO',1,1,'DEMOBOO_RN','

    Modify a book\'s bibliographic information:

    *Book Reference Number:  ','M','Reference Number','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOBOO',1,2,'DEMOBOO_CHANGE','

    *Choose the fields to be modified:
    ','M','Fields to Modify','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOBOO',1,3,'DEMOBOO_CONT','

    ','O','','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('APPDEMOBOO',1,1,'DEMOBOO_RN','

    Approve or reject an ATLANTIS book:

    *Book Reference Number:  ','M','Reference Number','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('APPDEMOBOO',1,2,'DEMOBOO_DECSN','

    *Decision:
    \r\n','M','Decision','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('APPDEMOBOO',1,3,'DEMOBOO_COMNT','

    Comments on Decision:
    \r\n','O','Referee\'s Comments','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('APPDEMOBOO',1,4,'DEMOBOO_REGB','

    ','O','','','2008-03-07','2008-03-07',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPOE',1,1,'DEMOPOE_TITLE','

    Submit an ATLANTIS Poem:

    *Poem Title:
    ','M','>Title','','2008-03-12','2008-03-12',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPOE',1,2,'DEMOPOE_AU','

    *Author(s) of the Poem: (one per line)
    ','M','Author(s)','','2008-03-12','2008-03-12',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPOE',1,3,'DEMOPOE_LANG','

    *Poem Language: ','M','Language','','2008-03-12','2008-03-12',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPOE',1,4,'DEMOPOE_YEAR','*Year of the Poem: ','M','Poem Year','','2008-03-12','2008-03-12',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPOE',1,5,'DEMOPOE_DUMMY','','O','','','2008-03-12','2008-03-12',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPOE',2,1,'DEMOPOE_ABS','


    *Poem Text:
    ','M','Abstract','','2008-03-12','2008-03-12',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOPOE',2,2,'DEMOPOE_END','


    ','O','','','2008-03-12','2008-03-12',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOPOE',1,1,'DEMOPOE_RN','

    Modify a poem\'s bibliographic information:

    *Poem Reference Number:  ','M','Reference Number','','2008-03-12','2008-03-12',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOPOE',1,2,'DEMOPOE_CHANGE','

    *Choose the fields to be modified:
    ','M','Fields to Modify','','2008-03-12','2008-03-12',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOPOE',1,3,'DEMOPOE_CONT','

    ','O','','','2008-03-12','2008-03-12',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,13,'DEMOJRN_ENDING','
    ','O','','','2009-02-20','2009-02-20',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOJRN',1,3,'DEMOJRN_CONT','

    ','O','','','2008-10-06','2009-01-09',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOJRN',1,2,'DEMOJRN_CHANGE','','O','','','2009-01-09','2009-01-09',NULL,NULL); INSERT INTO sbmFIELD VALUES ('MBIDEMOJRN',1,1,'DEMOJRN_RN','

    Update a journal article:

    *Document Reference Number:  ','M','','','2008-10-06','2008-10-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,3,'DEMOJRN_ISSUES','
    *Order(s) (digit) and issue(s) (xx/YYYY) of the article:
    ','O','Order and issue numbers','','2009-02-20','2009-02-20',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,2,'DEMOJRN_TYPE','

    Submit an Atlantis Times article:
    *Status:
    ','O','Status:','','2009-02-20','2009-02-20',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,5,'DEMOJRN_EMAIL','


    E-mail(s) of the author(s): (one per line)
    ','O','E-mail of the author(s): (one per line)','','2008-09-26','2009-01-09',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,9,'DEMOJRN_ABSF','

    French article:
    ','O','French article:','','2008-09-26','2009-01-09',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,7,'DEMOJRN_TITLEF','


    French title:
    ','O','French title:','','2008-09-26','2009-01-09',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,4,'DEMOJRN_AU','


    Author(s): (one per line)
    ','O','Author(s)','','2008-09-26','2009-02-20',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,6,'DEMOJRN_TITLEE','


    English title:
    ','O','English title:','','2008-09-26','2009-01-09',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,10,'DEMOJRN_IN','','O','Journal Name','','2008-09-26','2009-02-06',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,12,'DEMOJRN_END','
    ','O','','','2008-09-26','2009-02-20',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,8,'DEMOJRN_ABSE','

    English article:
    ','O','English article:','','2008-11-04','2009-01-09',NULL,NULL); INSERT INTO sbmFIELD VALUES ('SBIDEMOJRN',1,14,'DEMOJRN_CATEG','','O','comboDEMOJRN-like for MBI','','2009-10-15','2009-10-15',NULL,NULL); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_TITLE',NULL,'245__a','T',NULL,5,60,NULL,NULL,NULL,'2007-09-13','2007-09-13',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_PHOTOG',NULL,'100__a','T',NULL,6,30,NULL,NULL,NULL,'2007-09-13','2007-09-19','

    Picture Author(s) or Photographers(s)
    (optional)(one per line):
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_DATE',NULL,'260__c','I',10,NULL,NULL,NULL,NULL,NULL,'2007-09-13','2007-09-19','

    Date of the picture (dd/mm/yyyy): ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_KW',NULL,'6531_a','T',NULL,2,50,NULL,NULL,NULL,'2007-09-13','2007-09-13','

    Keywords
    (Optional, one keyword/key-phrase per line):
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_DESCR',NULL,'520__a','T',NULL,12,80,NULL,NULL,NULL,'2007-09-13','2007-09-13','

    Picture Description:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_ADD_RN',NULL,'088__a','T',NULL,4,30,NULL,NULL,NULL,'2007-09-13','2007-09-13','

    Additional Reference Numbers:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_NOTE',NULL,'500__a','T',NULL,6,60,NULL,NULL,NULL,'2007-09-13','2007-09-13','

    Additional Comments or Notes about the Picture:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_FILE',NULL,'','F',40,NULL,NULL,NULL,NULL,NULL,'2007-09-13','2007-09-13',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_FINISH',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2007-09-13','2007-09-13',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_CHANGE',NULL,'','S',NULL,NULL,NULL,NULL,NULL,'','2007-10-04','2007-10-04',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_RN',NULL,'037__a','I',30,NULL,NULL,NULL,'DEMO-PICTURE---???',NULL,'2007-10-04','2007-10-04',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPIC_CONT',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2007-10-04','2007-10-04',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_REP',NULL,'088__a','T',NULL,4,30,NULL,NULL,NULL,'2008-03-02','2008-03-02','
    Other Report Numbers (one per line):',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_TITLE',NULL,'245__a','T',NULL,5,60,NULL,NULL,NULL,'2008-03-02','2008-03-02','
    Title:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_SUBTTL',NULL,'245__b','T',NULL,3,60,NULL,NULL,NULL,'2008-03-02','2008-03-02','

    Thesis Subtitle (if any):
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_AU',NULL,'100__a','T',NULL,6,60,NULL,NULL,NULL,'2008-03-02','2008-03-02','
    Authors:
    (one per line):
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_SUPERV',NULL,'','T',NULL,6,60,NULL,NULL,NULL,'2008-03-02','2008-03-02','
    Thesis Supervisor(s)
    (one per line):
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_ABS',NULL,'520__a','T',NULL,12,80,NULL,NULL,NULL,'2008-03-02','2008-03-02','
    Abstract:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_NUMP',NULL,'300__a','I',5,NULL,NULL,NULL,NULL,NULL,'2008-03-02','2008-03-06','
    Number of Pages: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_LANG',NULL,'041__a','S',NULL,NULL,NULL,NULL,NULL,'','2008-03-02','2008-03-02','

    Select the Language: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_PUBL',NULL,'','I',35,NULL,NULL,NULL,NULL,NULL,'2008-03-02','2008-03-02','
    Thesis Publisher (or University): ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_PLDEF',NULL,'','I',20,NULL,NULL,NULL,NULL,NULL,'2008-03-02','2008-03-02','

    Place of Thesis Defence:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_DIPL',NULL,'','S',NULL,NULL,NULL,NULL,NULL,'','2008-03-02','2008-03-02','

    Diploma Awarded:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_DATE',NULL,'269__c','I',10,NULL,NULL,NULL,NULL,NULL,'2008-03-02','2008-03-02','
    Date: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_UNIV',NULL,'502__b','I',30,NULL,NULL,NULL,NULL,NULL,'2008-03-02','2008-03-02','
    Awarding University:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_PLACE',NULL,'','I',20,NULL,NULL,NULL,NULL,NULL,'2008-03-02','2008-03-02',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_FILE',NULL,'','F',60,NULL,NULL,NULL,NULL,NULL,'2008-03-02','2008-03-02',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_END',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2008-03-02','2008-03-02',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_RN',NULL,'037__a','I',30,NULL,NULL,NULL,'DEMO-THESIS--???',NULL,'2008-03-05','2008-03-05',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_CHANGE',NULL,'','S',NULL,NULL,NULL,NULL,NULL,'','2008-03-05','2008-03-06',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOTHE_CONT',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2008-03-05','2008-03-05',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_ABS',NULL,'520__a','T',NULL,12,80,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Abstract:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_AU',NULL,'100__a','T',NULL,6,60,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Authors: (one per line)
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_CHANGE',NULL,'','S',NULL,NULL,NULL,NULL,NULL,'','2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_CONT',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_DATE',NULL,'269__c','I',10,NULL,NULL,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Date: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_END',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_FILE',NULL,'','F',60,NULL,NULL,NULL,NULL,NULL,'2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_KW',NULL,'6531_a','T',NULL,4,50,NULL,NULL,NULL,'2008-03-07','2008-03-07','

    Keywords:
    (one keyword/key-phrase per line)
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_LANG',NULL,'041__a','S',NULL,NULL,NULL,NULL,NULL,'','2008-03-07','2008-03-07','

    Select the Language: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_NOTE',NULL,'500__a','T',NULL,6,60,NULL,NULL,NULL,'2008-03-07','2008-03-07','

    Additional Comments or Notes:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_NUMP',NULL,'300__a','I',5,NULL,NULL,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Number of Pages: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_REP',NULL,'088__a','T',NULL,4,30,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Other Report Numbers (one per line):',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_RN',NULL,'037__a','I',35,NULL,NULL,NULL,'DEMO---???',NULL,'2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOART_TITLE',NULL,'245__a','T',NULL,5,60,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Title:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_ABS',NULL,'520__a','T',NULL,12,80,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Abstract:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_AU',NULL,'100__a','T',NULL,6,60,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Authors: (one per line)
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_CHANGE',NULL,'','S',NULL,NULL,NULL,NULL,NULL,'','2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_CONT',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_DATE',NULL,'269__c','I',10,NULL,NULL,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Date: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_END',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_FILE',NULL,'','F',60,NULL,NULL,NULL,NULL,NULL,'2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_KW',NULL,'6531_a','T',NULL,4,50,NULL,NULL,NULL,'2008-03-07','2008-03-07','

    Keywords:
    (one keyword/key-phrase per line)
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_LANG',NULL,'041__a','S',NULL,NULL,NULL,NULL,NULL,'','2008-03-07','2008-03-07','

    Select the Language: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_NOTE',NULL,'500__a','T',NULL,6,60,NULL,NULL,NULL,'2008-03-07','2008-03-07','

    Additional Comments or Notes:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_NUMP',NULL,'300__a','I',5,NULL,NULL,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Number of Pages: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_REP',NULL,'088__a','T',NULL,4,30,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Other Report Numbers (one per line):',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_RN',NULL,'037__a','I',35,NULL,NULL,NULL,'DEMO-BOOK--???',NULL,'2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_TITLE',NULL,'245__a','T',NULL,5,60,NULL,NULL,NULL,'2008-03-07','2008-03-07','
    Title:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_COMNT',NULL,'','T',NULL,6,60,NULL,NULL,NULL,'2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_DECSN',NULL,'','S',NULL,NULL,NULL,NULL,NULL,'\r\n','2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOBOO_REGB',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2008-03-07','2008-03-07',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPOE_ABS',NULL,'520__a','T',NULL,20,80,NULL,NULL,NULL,'2008-03-12','2008-03-12','
    Abstract:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPOE_AU',NULL,'100__a','T',NULL,6,60,NULL,NULL,NULL,'2008-03-12','2008-03-12','
    Authors: (one per line)
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPOE_CHANGE',NULL,'','S',NULL,NULL,NULL,NULL,NULL,'\r\n','2008-03-12','2008-03-12',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPOE_CONT',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2008-03-12','2008-03-12',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPOE_DUMMY',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'


    ','2008-03-12','2008-03-12',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPOE_END',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'

    \r\n\r\n
    ','2008-03-12','2008-03-12',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPOE_LANG',NULL,'041__a','S',NULL,NULL,NULL,NULL,NULL,'','2008-03-12','2008-03-12','

    Select the Language: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPOE_RN',NULL,'037__a','I',35,NULL,NULL,NULL,'DEMO-POETRY--???',NULL,'2008-03-12','2008-03-12',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPOE_TITLE',NULL,'245__a','T',NULL,5,60,NULL,NULL,NULL,'2008-03-12','2008-03-12','
    Title:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOPOE_YEAR',NULL,'909C0y','I',4,NULL,NULL,4,NULL,NULL,'2008-03-12','2008-03-12','

    Year: ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_CHANGE',NULL,'','S',NULL,NULL,NULL,NULL,NULL,'','2009-01-09','2009-02-20',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_TYPE',NULL,'691__a','S',NULL,NULL,NULL,NULL,NULL,'[?]','2008-12-04','2009-02-20','

    Update an Atlantis Times article:
    *Status:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_AU',NULL,'100__a','T',NULL,4,60,NULL,NULL,NULL,'2008-09-23','2009-02-20','


    Author(s): (one per line)
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_EMAIL',NULL,'859__a','T',NULL,4,60,NULL,NULL,NULL,'2008-09-23','2009-02-20','


    E-mail(s) of the author(s): (one per line)
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_TITLEE',NULL,'245__a','T',NULL,5,60,NULL,NULL,NULL,'2008-09-23','2009-02-20','


    English title:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_TITLEF',NULL,'246_1a','T',NULL,5,60,NULL,NULL,NULL,'2008-09-23','2009-02-20','


    French title:
    ',NULL,0); -INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_ABSF',NULL,'590__b','R',NULL,100,90,NULL,NULL,'from invenio.htmlutils import get_html_text_editor\r\nfrom invenio.config import CFG_SITE_URL\r\nfrom invenio.search_engine import get_fieldvalues\r\nimport os\r\n\r\nif (\'modify\' in curdir) and not os.path.exists(\"%s/DEMOJRN_ABSF\" % curdir):\r\n try:\r\n content = get_fieldvalues(int(sysno), \'590__b\')[0]\r\n except:\r\n content = \'\'\r\nelif os.path.exists(\"%s/DEMOJRN_ABSE\" % curdir):\r\n content = file(\"%s/DEMOJRN_ABSE\" % curdir).read()\r\nelse:\r\n content = \'\'\r\n\r\ntext = get_html_text_editor(\"DEMOJRN_ABSF\", id=\"BulletinFCKEditor1\", content=content, toolbar_set=\"WebJournal\", width=\'522px\', height=\'700px\', file_upload_url=CFG_SITE_URL + \'/submit/attachfile\', custom_configurations_path=\'/fckeditor/journal-editor-config.js\')','2008-09-23','2009-02-23','

    French Article:
    ',NULL,0); +INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_ABSF',NULL,'590__b','R',NULL,100,90,NULL,NULL,'from invenio.htmlutils import get_html_text_editor\r\nfrom invenio.config import CFG_SITE_URL\r\nfrom invenio.search_engine_utils import get_fieldvalues\r\nimport os\r\n\r\nif (\'modify\' in curdir) and not os.path.exists(\"%s/DEMOJRN_ABSF\" % curdir):\r\n try:\r\n content = get_fieldvalues(int(sysno), \'590__b\')[0]\r\n except:\r\n content = \'\'\r\nelif os.path.exists(\"%s/DEMOJRN_ABSE\" % curdir):\r\n content = file(\"%s/DEMOJRN_ABSE\" % curdir).read()\r\nelse:\r\n content = \'\'\r\n\r\ntext = get_html_text_editor(\"DEMOJRN_ABSF\", id=\"BulletinFCKEditor1\", content=content, toolbar_set=\"WebJournal\", width=\'522px\', height=\'700px\', file_upload_url=CFG_SITE_URL + \'/submit/attachfile\', custom_configurations_path=\'/fckeditor/journal-editor-config.js\')','2008-09-23','2009-02-23','

    French Article:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_CONT',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2008-10-06','2008-10-06',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_END',NULL,'','D',NULL,NULL,NULL,NULL,NULL,'
    \r\n\r\n
    ','2008-09-23','2009-02-20','
    ',NULL,0); -INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_ISSUES',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'from invenio.search_engine import get_fieldvalues\r\nfrom invenio.webjournal_utils import get_next_journal_issues, get_current_issue, get_journal_issue_grouping\r\nimport os\r\n\r\norders_and_issues = [(\'\',\'\')]*4\r\n\r\nif (\'modify\' in curdir) and not os.path.exists(\"%s/DEMOJRN_ISSUE1\" % curdir):\r\n try:\r\n orders = get_fieldvalues(int(sysno), \'773__c\')\r\n issues = get_fieldvalues(int(sysno), \'773__n\')\r\n orders_and_issues = zip(orders, issues) + orders_and_issues\r\n except:\r\n pass\r\nelif (\'running\' in curdir) and not os.path.exists(\"%s/DEMOJRN_ISSUE1\" % curdir):\r\n try:\r\n journal_name = \'AtlantisTimes\'\r\n current_issue = get_current_issue(\'en\', journal_name)\r\n nb_issues = get_journal_issue_grouping(journal_name)\r\n next_issue_numbers = get_next_journal_issues(current_issue, journal_name, nb_issues)\r\n orders_and_issues = zip([\'\']*4, next_issue_numbers) + orders_and_issues\r\n except:\r\n pass\r\nissues_fields = []\r\nsingle_issue_and_order_tmpl = \'\'\'\r\n\r\n\'\'\'\r\ni = 1\r\nfor order_and_issue in orders_and_issues[:4]:\r\n order = order_and_issue[0]\r\n issue = order_and_issue[1]\r\n issues_fields.append(single_issue_and_order_tmpl % (i, order, i, issue))\r\n i += 1\r\n\r\ntext = \'
    \'.join(issues_fields)\r\n','2009-02-20','2009-02-23','
    *Order(s) (digit) and issue(s) (xx/YYYY) of the article:
    ',NULL,0); +INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_ISSUES',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'from invenio.search_engine_utils import get_fieldvalues\r\nfrom invenio.webjournal_utils import get_next_journal_issues, get_current_issue, get_journal_issue_grouping\r\nimport os\r\n\r\norders_and_issues = [(\'\',\'\')]*4\r\n\r\nif (\'modify\' in curdir) and not os.path.exists(\"%s/DEMOJRN_ISSUE1\" % curdir):\r\n try:\r\n orders = get_fieldvalues(int(sysno), \'773__c\')\r\n issues = get_fieldvalues(int(sysno), \'773__n\')\r\n orders_and_issues = zip(orders, issues) + orders_and_issues\r\n except:\r\n pass\r\nelif (\'running\' in curdir) and not os.path.exists(\"%s/DEMOJRN_ISSUE1\" % curdir):\r\n try:\r\n journal_name = \'AtlantisTimes\'\r\n current_issue = get_current_issue(\'en\', journal_name)\r\n nb_issues = get_journal_issue_grouping(journal_name)\r\n next_issue_numbers = get_next_journal_issues(current_issue, journal_name, nb_issues)\r\n orders_and_issues = zip([\'\']*4, next_issue_numbers) + orders_and_issues\r\n except:\r\n pass\r\nissues_fields = []\r\nsingle_issue_and_order_tmpl = \'\'\'\r\n\r\n\'\'\'\r\ni = 1\r\nfor order_and_issue in orders_and_issues[:4]:\r\n order = order_and_issue[0]\r\n issue = order_and_issue[1]\r\n issues_fields.append(single_issue_and_order_tmpl % (i, order, i, issue))\r\n i += 1\r\n\r\ntext = \'
    \'.join(issues_fields)\r\n','2009-02-20','2009-02-23','
    *Order(s) (digit) and issue(s) (xx/YYYY) of the article:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_RN',NULL,'037__a','I',35,NULL,NULL,NULL,'BUL---???',NULL,'2008-10-06','2009-02-20',NULL,NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_IN',NULL,'595__a','H',NULL,NULL,NULL,NULL,'Atlantis Times',NULL,'2008-09-23','2009-02-20',' ',NULL,0); -INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_ABSE',NULL,'520__b','R',NULL,100,90,NULL,NULL,'from invenio.htmlutils import get_html_text_editor\r\nfrom invenio.config import CFG_SITE_URL\r\nfrom invenio.search_engine import get_fieldvalues\r\nimport os\r\n\r\n\r\nif (\'modify\' in curdir) and not os.path.exists(\"%s/DEMOJRN_ABSE\" % curdir):\r\n try:\r\n content = get_fieldvalues(int(sysno), \'520__b\')[0]\r\n except:\r\n content = \'\'\r\nelif os.path.exists(\"%s/DEMOJRN_ABSE\" % curdir):\r\n content = file(\"%s/DEMOJRN_ABSE\" % curdir).read()\r\nelse:\r\n content = \'\'\r\n\r\ntext = get_html_text_editor(\"DEMOJRN_ABSE\",id=\"BulletinFCKEditor2\", content=content, toolbar_set=\"WebJournal\", width=\'522px\', height=\'700px\', file_upload_url=CFG_SITE_URL + \'/submit/attachfile\', custom_configurations_path=\'/fckeditor/journal-editor-config.js\')\r\n\r\n','2008-09-22','2009-02-23','

    English Article:
    ',NULL,0); +INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_ABSE',NULL,'520__b','R',NULL,100,90,NULL,NULL,'from invenio.htmlutils import get_html_text_editor\r\nfrom invenio.config import CFG_SITE_URL\r\nfrom invenio.search_engine_utils import get_fieldvalues\r\nimport os\r\n\r\n\r\nif (\'modify\' in curdir) and not os.path.exists(\"%s/DEMOJRN_ABSE\" % curdir):\r\n try:\r\n content = get_fieldvalues(int(sysno), \'520__b\')[0]\r\n except:\r\n content = \'\'\r\nelif os.path.exists(\"%s/DEMOJRN_ABSE\" % curdir):\r\n content = file(\"%s/DEMOJRN_ABSE\" % curdir).read()\r\nelse:\r\n content = \'\'\r\n\r\ntext = get_html_text_editor(\"DEMOJRN_ABSE\",id=\"BulletinFCKEditor2\", content=content, toolbar_set=\"WebJournal\", width=\'522px\', height=\'700px\', file_upload_url=CFG_SITE_URL + \'/submit/attachfile\', custom_configurations_path=\'/fckeditor/journal-editor-config.js\')\r\n\r\n','2008-09-22','2009-02-23','

    English Article:
    ',NULL,0); INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_ENDING',NULL,'','H',NULL,NULL,NULL,NULL,NULL,NULL,'2009-02-06','2009-02-20','
    ',NULL,0); -INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_CATEG',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'# Solve usual problem with submit/direct?.. links that bypass \r\n# the comboXXX (category selection) of the submission. Retrieve \r\n# the value, and set it (only in the case of MBI)\r\n\r\nfrom invenio.search_engine import get_fieldvalues\r\n\r\nif \"modify\" in curdir:\r\n try:\r\n comboDEMOJRNfile = file(\"%s/%s\" % (curdir,\'comboDEMOJRN\'), \'w\')\r\n report_number = get_fieldvalues(int(sysno), \'037__a\')[0]\r\n category = report_number.split(\'-\')[1]\r\n comboDEMOJRNfile.write(category)\r\n comboDEMOJRNfile.close()\r\n except:\r\n text = \'\'','2009-10-15','2009-10-15',NULL,NULL,0); +INSERT INTO sbmFIELDDESC VALUES ('DEMOJRN_CATEG',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'# Solve usual problem with submit/direct?.. links that bypass \r\n# the comboXXX (category selection) of the submission. Retrieve \r\n# the value, and set it (only in the case of MBI)\r\n\r\nfrom invenio.search_engine_utils import get_fieldvalues\r\n\r\nif \"modify\" in curdir:\r\n try:\r\n comboDEMOJRNfile = file(\"%s/%s\" % (curdir,\'comboDEMOJRN\'), \'w\')\r\n report_number = get_fieldvalues(int(sysno), \'037__a\')[0]\r\n category = report_number.split(\'-\')[1]\r\n comboDEMOJRNfile.write(category)\r\n comboDEMOJRNfile.close()\r\n except:\r\n text = \'\'','2009-10-15','2009-10-15',NULL,NULL,0); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPIC','Mail_Submitter',70,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPIC','Print_Success',60,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPIC','Move_Photos_to_Storage',50,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPIC','Insert_Record',40,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPIC','Make_Record',30,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPIC','Report_Number_Generation',20,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPIC','Move_to_Done',80,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPIC','Create_Recid',10,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Get_Report_Number',10,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Get_Recid',20,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Is_Original_Submitter',30,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Create_Modify_Interface',40,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Get_Report_Number',10,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Get_Recid',20,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Is_Original_Submitter',30,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Make_Modify_Record',40,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Insert_Modify_Record',50,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Move_Photos_to_Storage',60,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Print_Success_MBI',70,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Send_Modify_Mail',80,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPIC','Move_to_Done',90,2); INSERT INTO sbmFUNCTIONS VALUES ('SRV','DEMOPIC','Create_Upload_Files_Interface',40,1); INSERT INTO sbmFUNCTIONS VALUES ('SRV','DEMOPIC','Is_Original_Submitter',30,1); INSERT INTO sbmFUNCTIONS VALUES ('SRV','DEMOPIC','Get_Recid',20,1); INSERT INTO sbmFUNCTIONS VALUES ('SRV','DEMOPIC','Get_Report_Number',10,1); INSERT INTO sbmFUNCTIONS VALUES ('SRV','DEMOPIC','Move_to_Done',60,2); INSERT INTO sbmFUNCTIONS VALUES ('SRV','DEMOPIC','Print_Success',50,2); INSERT INTO sbmFUNCTIONS VALUES ('SRV','DEMOPIC','Mail_Submitter',40,2); INSERT INTO sbmFUNCTIONS VALUES ('SRV','DEMOPIC','Move_Uploaded_Files_to_Storage',30,2); INSERT INTO sbmFUNCTIONS VALUES ('SRV','DEMOPIC','Is_Original_Submitter',20,2); INSERT INTO sbmFUNCTIONS VALUES ('SRV','DEMOPIC','Get_Recid',10,2); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOTHE','Move_to_Done',90,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOTHE','Mail_Submitter',80,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOTHE','Make_Record',50,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOTHE','Insert_Record',60,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOTHE','Print_Success',70,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOTHE','Move_Files_to_Storage',40,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOTHE','Stamp_Uploaded_Files',30,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOTHE','Report_Number_Generation',20,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOTHE','Create_Recid',10,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Get_Report_Number',10,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Get_Recid',20,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Is_Original_Submitter',30,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Create_Modify_Interface',40,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Move_to_Done',80,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Send_Modify_Mail',70,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Print_Success_MBI',60,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Insert_Modify_Record',50,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Make_Modify_Record',40,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Is_Original_Submitter',30,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Get_Recid',20,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOTHE','Get_Report_Number',10,2); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOART','Print_Success',50,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOART','Insert_Record',40,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOART','Make_Record',30,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOART','Report_Number_Generation',20,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOART','Create_Recid',10,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOART','Mail_Submitter',60,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Create_Modify_Interface',40,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Get_Recid',20,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Is_Original_Submitter',30,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Get_Report_Number',10,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Move_to_Done',80,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Send_Modify_Mail',70,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Print_Success_MBI',60,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Insert_Modify_Record',50,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Make_Modify_Record',40,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Is_Original_Submitter',30,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Get_Recid',20,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOART','Get_Report_Number',10,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Create_Modify_Interface',40,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Get_Recid',20,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Is_Original_Submitter',30,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Get_Report_Number',10,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Move_to_Done',90,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Send_Modify_Mail',80,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Print_Success_MBI',70,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Move_Uploaded_Files_to_Storage',60,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Insert_Modify_Record',50,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Make_Modify_Record',40,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Is_Original_Submitter',30,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Get_Recid',20,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOBOO','Get_Report_Number',10,2); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Get_Report_Number',10,1); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Test_Status',20,1); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Is_Referee',30,1); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','CaseEDS',40,1); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Send_APP_Mail',90,2); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Print_Success_APP',80,2); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Update_Approval_DB',70,2); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Insert_Record',60,2); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Print_Success_APP',60,3); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Send_APP_Mail',70,3); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Move_From_Pending',20,3); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Get_Recid',30,3); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Get_Info',40,3); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Get_Report_Number',10,3); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Update_Approval_DB',50,3); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Move_to_Done',80,3); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOBOO','Move_to_Pending',90,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOBOO','Print_Success',80,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOBOO','Send_Approval_Request',70,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOBOO','Update_Approval_DB',60,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOBOO','Mail_Submitter',50,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOBOO','Move_Files_to_Storage',40,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOBOO','Make_Dummy_MARC_XML_Record',30,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOBOO','Report_Number_Generation',20,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOBOO','Create_Recid',10,1); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Make_Record',50,2); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Get_Info',40,2); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Get_Recid',30,2); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Move_From_Pending',20,2); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Get_Report_Number',10,2); INSERT INTO sbmFUNCTIONS VALUES ('APP','DEMOBOO','Move_to_Done',100,2); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPOE','Create_Recid',10,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPOE','Report_Number_Generation',20,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPOE','Make_Record',30,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPOE','Insert_Record',40,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPOE','Print_Success',50,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPOE','Mail_Submitter',60,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOPOE','Move_to_Done',70,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Get_Report_Number',10,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Get_Recid',20,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Is_Original_Submitter',30,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Create_Modify_Interface',40,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Move_to_Done',80,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Print_Success_MBI',60,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Insert_Modify_Record',50,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Make_Modify_Record',40,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Is_Original_Submitter',30,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Get_Recid',20,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOPOE','Get_Report_Number',10,2); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOJRN','Move_to_Done',80,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOJRN','Mail_Submitter',70,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOJRN','Print_Success',60,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOJRN','Insert_Record',50,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOJRN','Make_Record',40,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOJRN','Move_FCKeditor_Files_to_Storage',30,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOJRN','Report_Number_Generation',20,1); INSERT INTO sbmFUNCTIONS VALUES ('SBI','DEMOJRN','Create_Recid',10,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Get_Report_Number',10,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Get_Recid',20,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Create_Modify_Interface',30,1); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Move_to_Done',90,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Send_Modify_Mail',80,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Print_Success_MBI',70,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Move_Files_to_Storage',60,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Insert_Modify_Record',50,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Make_Modify_Record',40,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Move_FCKeditor_Files_to_Storage',30,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Get_Recid',20,2); INSERT INTO sbmFUNCTIONS VALUES ('MBI','DEMOJRN','Get_Report_Number',10,2); INSERT INTO sbmIMPLEMENT VALUES ('DEMOPIC','SBI','Y','SBIDEMOPIC',1,'2007-09-13','2007-10-04',1,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOPIC','MBI','Y','MBIDEMOPIC',1,'2007-10-04','2007-10-04',2,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOPIC','SRV','Y','SRVDEMOPIC',1,'2009-04-09','2009-04-09',3,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOTHE','SBI','Y','SBIDEMOTHE',1,'2008-03-02','2008-03-05',1,'','1',1,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOTHE','MBI','Y','MBIDEMOTHE',1,'2008-03-05','2008-03-05',2,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOART','SBI','Y','SBIDEMOART',1,'2008-03-06','2008-03-07',1,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOART','MBI','Y','MBIDEMOART',1,'2008-03-07','2008-03-07',2,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOBOO','SBI','Y','SBIDEMOBOO',1,'2008-03-06','2008-03-07',1,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOBOO','MBI','Y','MBIDEMOBOO',1,'2008-03-07','2008-03-07',2,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOBOO','APP','Y','APPDEMOBOO',1,'2002-05-06','2002-05-28',3,'0','0',0,1,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOPOE','SBI','Y','SBIDEMOPOE',2,'2008-03-12','2008-03-12',1,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOPOE','MBI','Y','MBIDEMOPOE',1,'2008-03-12','2008-03-12',2,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOJRN','SBI','Y','SBIDEMOJRN',1,'2008-09-18','2009-02-23',1,'','',0,0,''); INSERT INTO sbmIMPLEMENT VALUES ('DEMOJRN','MBI','Y','MBIDEMOJRN',1,'2008-09-18','2009-02-23',2,'','',0,0,''); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','addressesMBI',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','authorfile','DEMOPIC_PHOTOG'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','autorngen','Y'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','counterpath','lastid_DEMOPIC_categ_yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','createTemplate','DEMOPICcreate.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','documenttype','picture'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','edsrn','DEMOPIC_RN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','emailFile','SuE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','fieldnameMBI','DEMOPIC_CHANGE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','iconsize','180>,700>'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','modifyTemplate','DEMOPICmodify.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','newrnin','NEWRN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','paths_and_suffixes','{\"DEMOPIC_FILE\":\"\"}'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','rename','file:DEMOPIC_RN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','rnformat','DEMO-PICTURE-categ-yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','rnin','comboDEMOPIC'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','sourceDoc','photos'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','sourceTemplate','DEMOPIC.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','status','ADDED'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','titleFile','DEMOPIC_TITLE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','yeargen','AUTO'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','doctypes','DEMOPIC_FILE=Picture|Additional=Additional Document'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','canReviseDoctypes','*'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','maxFilesDoctypes','Additional=1'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','canNameNewFiles','1'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','canRenameDoctypes',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','canCommentDoctypes','*'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','canAddFormatDoctypes','DEMOPIC_FILE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','showLinks','1'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','keepDefault','1'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','restrictions','=Public|restricted_picture=Private'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','canRestrictDoctypes','*'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','canDeleteDoctypes','*'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','createIconDoctypes','*'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPIC','forceFileRevision',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','counterpath','lastid_DEMOTHE_yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','autorngen','Y'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','edsrn','DEMOTHE_RN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','rnformat','DEMO-THESIS-yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','yeargen','AUTO'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','rnin','comboDEMOTHE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','files_to_be_stamped','DEMOTHE_FILE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','latex_template','demo-stamp-left.tex'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','stamp','first'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','latex_template_vars','{\'REPORTNUMBER\':\'FILE:DEMOTHE_RN\',\'DATE\':\'FILE:DEMOTHE_DATE\'}'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','createTemplate','DEMOTHEcreate.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','sourceTemplate','DEMOTHE.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','documenttype','fulltext'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','iconsize','180>,700>'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','paths_and_suffixes','{\"DEMOTHE_FILE\":\"\"}'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','rename','file:DEMOTHE_RN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','newrnin',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','status','ADDED'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','authorfile','DEMOTHE_AU'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','emailFile','SuE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','titleFile','DEMOTHE_TITLE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','fieldnameMBI','DEMOTHE_CHANGE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','modifyTemplate','DEMOTHEmodify.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','addressesMBI',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOTHE','sourceDoc','Thesis'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','addressesMBI',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','authorfile','DEMOART_AU'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','autorngen','Y'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','counterpath','lastid_DEMOART_categ_yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','createTemplate','DEMOARTcreate.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','documenttype','fulltext'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','edsrn','DEMOART_RN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','emailFile','SuE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','fieldnameMBI','DEMOART_CHANGE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','iconsize','180'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','modifyTemplate','DEMOARTmodify.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','newrnin',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','paths_and_suffixes','{\"DEMOART_FILE\":\"\"}'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','rename','file:DEMOART_RN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','rnformat','DEMO-categ-yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','rnin','comboDEMOART'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','sourceDoc','Textual Document'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','sourceTemplate','DEMOART.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','status','ADDED'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','titleFile','DEMOART_TITLE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOART','yeargen','AUTO'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','autorngen','Y'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','counterpath','lastid_DEMOBOO_yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','edsrn','DEMOBOO_RN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','rnformat','DEMO-BOOK-yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','rnin','comboDEMOBOO'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','yeargen','AUTO'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','newrnin','NEWRN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','status','APPROVAL'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','authorfile','DEMOBOO_AU'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','emailFile','SuE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','titleFile','DEMOBOO_TITLE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','categformatDAM',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','addressesDAM',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','directory','DEMOBOO'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','fieldnameMBI','DEMOBOO_CHANGE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','modifyTemplate','DEMOBOOmodify.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','sourceTemplate','DEMOBOO.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','addressesMBI',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','sourceDoc','BOOK'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','casevalues','approve,reject'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','casesteps','2,3'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','casevariable','DEMOBOO_DECSN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','casedefault',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','categformatAPP',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','addressesAPP',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','createTemplate','DEMOBOOcreate.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','documenttype','fulltext'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','iconsize','180>,700>'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','paths_and_suffixes','{\"DEMOBOO_FILE\":\"\"}'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','rename','file:DEMOBOO_RN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','dummyrec_source_tpl','DEMOBOO.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','dummyrec_create_tpl','DEMOBOOcreate.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','decision_file','DEMOBOO_DECSN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','comments_file','DEMOBOO_COMNT'); INSERT INTO sbmPARAMETERS VALUES ('DEMOBOO','elementNameToDoctype','DEMOBOO_FILE=DEMOBOO_FILE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','addressesMBI',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','authorfile','DEMOPOE_AU'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','autorngen','Y'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','counterpath','lastid_DEMOPOE_yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','createTemplate','DEMOPOEcreate.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','edsrn','DEMOPOE_RN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','emailFile','SuE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','fieldnameMBI','DEMOPOE_CHANGE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','modifyTemplate','DEMOPOEmodify.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','newrnin',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','rnformat','DEMO-POETRY-yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','rnin','comboDEMOPOE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','sourceDoc','Poem'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','sourceTemplate','DEMOPOE.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','status','ADDED'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','titleFile','DEMOPOE_TITLE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOPOE','yeargen','AUTO'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','record_search_pattern','collection:ATLANTISTIMES*'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','paths_and_suffixes','{\'image\':\"image\", \'file\':\"file\", \'flash\':\"flash\", \'media\':\'media\'}'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','documenttype','picture'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','rename',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','addressesMBI',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','emailFile','SuE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','sourceTemplate','DEMOJRN.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','edsrn','DEMOJRN_RN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','modifyTemplate','DEMOJRNmodify.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','iconsize','300>'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','createTemplate','DEMOJRNcreate.tpl'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','newrnin',''); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','status','ADDED'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','titleFile','DEMOJRN_TITLEE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','authorfile','DEMOJRN_AU'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','fieldnameMBI','DEMOJRN_CHANGE'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','sourceDoc','Textual Document'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','autorngen','Y'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','counterpath','lastid_DEMOJRN_categ_yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','rnformat','BUL-categ-yy'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','rnin','comboDEMOJRN'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','yeargen','AUTO'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','input_fields','DEMOJRN_ABSE,DEMOJRN_ABSF'); INSERT INTO sbmPARAMETERS VALUES ('DEMOJRN','files','DEMOJRN_ABSE,DEMOJRN_ABSF'); INSERT INTO rnkMETHOD (id,name,last_updated) VALUES (2,'demo_jif','0000-00-00 00:00:00'); INSERT INTO collection_rnkMETHOD (id_collection,id_rnkMETHOD,score) VALUES (15,2,90); INSERT INTO rnkMETHOD (id,name,last_updated) VALUES (3,'citation','0000-00-00 00:00:00'); INSERT INTO collection_rnkMETHOD (id_collection,id_rnkMETHOD,score) VALUES (1,3,10); INSERT INTO collection_rnkMETHOD (id_collection,id_rnkMETHOD,score) VALUES (15,3,80); INSERT INTO rnkMETHOD (id,name,last_updated) VALUES (4,'citerank_citation_t','0000-00-00 00:00:00'); INSERT INTO collection_rnkMETHOD (id_collection,id_rnkMETHOD,score) VALUES (15,4,70); INSERT INTO rnkMETHOD (id,name,last_updated) VALUES (5,'citerank_pagerank_c','0000-00-00 00:00:00'); INSERT INTO collection_rnkMETHOD (id_collection,id_rnkMETHOD,score) VALUES (15,5,60); INSERT INTO rnkMETHOD (id,name,last_updated) VALUES (6,'citerank_pagerank_t','0000-00-00 00:00:00'); INSERT INTO collection_rnkMETHOD (id_collection,id_rnkMETHOD,score) VALUES (15,6,50); INSERT INTO externalcollection (id, name) VALUES (1, 'Amazon'); INSERT INTO externalcollection (id, name) VALUES (2, 'CERN EDMS'); INSERT INTO externalcollection (id, name) VALUES (3, 'CERN Indico'); INSERT INTO externalcollection (id, name) VALUES (4, 'CERN Intranet'); INSERT INTO externalcollection (id, name) VALUES (5, 'CiteSeer'); INSERT INTO externalcollection (id, name) VALUES (6, 'Google Books'); INSERT INTO externalcollection (id, name) VALUES (7, 'Google Scholar'); INSERT INTO externalcollection (id, name) VALUES (8, 'Google Web'); INSERT INTO externalcollection (id, name) VALUES (9, 'IEC'); INSERT INTO externalcollection (id, name) VALUES (10, 'IHS'); INSERT INTO externalcollection (id, name) VALUES (11, 'INSPEC'); INSERT INTO externalcollection (id, name) VALUES (12, 'ISO'); INSERT INTO externalcollection (id, name) VALUES (13, 'KISS Books/Journals'); INSERT INTO externalcollection (id, name) VALUES (14, 'KISS Preprints'); INSERT INTO externalcollection (id, name) VALUES (15, 'NEBIS'); INSERT INTO externalcollection (id, name) VALUES (16, 'SLAC Library Catalog'); INSERT INTO externalcollection (id, name) VALUES (17, 'SPIRES HEP'); INSERT INTO externalcollection (id, name) VALUES (18, 'Scirus'); INSERT INTO externalcollection (id, name) VALUES (19, 'Atlantis Institute Books'); INSERT INTO externalcollection (id, name) VALUES (20, 'Atlantis Institute Articles'); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (1,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,3,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,5,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,13,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,14,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,17,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,18,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (2,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (3,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (4,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (5,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,3,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,5,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,13,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,14,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,17,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,18,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (6,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (7,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (8,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (9,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (10,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (11,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (12,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (13,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (14,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,3,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,5,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,13,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,14,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,17,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,18,2); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (15,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (16,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (17,20,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,1,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,2,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,3,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,4,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,5,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,6,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,7,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,8,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,9,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,10,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,11,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,12,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,13,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,14,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,15,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,16,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,17,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,18,1); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,19,0); INSERT INTO collection_externalcollection (id_collection,id_externalcollection,type) VALUES (18,20,0); INSERT INTO knwKB VALUES ('1','DBCOLLID2COLL','DbCollID to Coll name correspondance.', NULL); INSERT INTO knwKB VALUES ('2','EJOURNALS','Knowledge base of all known electronic journals. Useful for reference linking.', NULL); INSERT INTO knwKB VALUES ('3','DBCOLLID2BIBTEX','Mapping between the 980 field and BibTeX entry types.', NULL); INSERT INTO knwKB VALUES ('4','SEARCH-SYNONYM-JOURNAL','Knowledge base of journal title synonyms. Used during search time.', NULL); INSERT INTO knwKB VALUES ('5','INDEX-SYNONYM-TITLE','Knowledge base of title word synonyms. Used during indexing time.', NULL); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('ARTICLE','Published Article', '1'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('PREPRINT','Preprint', '1'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('THESIS','Thesis', '1'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('BOOK','Book', '1'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('REPORT','Report', '1'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('PICTURE','Pictures', '1'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('AAS Photo Bull.','AAS Photo Bull.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Accredit. Qual. Assur.','Accredit. Qual. Assur.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Acoust. Phys.','Acoust. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Acoust. Res. Lett.','Acoust. Res. Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Acta Astron.','Acta Astron.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Adv. Comput. Math.','Adv. Comput. Math.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Aequ. Math.','Aequ. Math.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Afr. Skies','Afr. Skies', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Algorithmica','Algorithmica', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Am. J. Phys.','Am. J. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Ann. Phys.','Ann. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Annu. Rev. Astron. Astrophys.','Annu. Rev. Astron. Astrophys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Annu. Rev. Earth Planet. Sci.','Annu. Rev. Earth Planet. Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Appl. Phys. Lett.','Appl. Phys. Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Appl. Phys., A','Appl. Phys., A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Appl. Phys., B','Appl. Phys., B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Appl. Radiat. Isot.','Appl. Radiat. Isot.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Appl. Surf. Sci.','Appl. Surf. Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Arch. Appl. Mech.','Arch. Appl. Mech.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Arch. Envir. Contam. Toxicol.','Arch. Envir. Contam. Toxicol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Arch. Rational Mech. Analys.','Arch. Rational Mech. Analys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Astron. Astrophys. Rev.','Astron. Astrophys. Rev.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Astron. Astrophys.','Astron. Astrophys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Astron. Astrophys., Suppl.','Astron. Astrophys., Suppl.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Astron. J.','Astron. J.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Astron. Lett.','Astron. Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Astron. Nachr.','Astron. Nachr.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Astron. Rep.','Astron. Rep.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Astropart. Phys.','Astropart. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Astrophys. J.','Astrophys. J.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Astrophys. Norvegica','Astrophys. Norvegica', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Balt. Astron.','Balt. Astron.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Bioimaging','Bioimaging', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Biol. Cybern.','Biol. Cybern.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Bull. Astron. Belgrade','Bull. Astron. Belgrade', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Bull. Astron. Inst. Czech.','Bull. Astron. Inst. Czech.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Bull. Astron. Soc. India','Bull. Astron. Soc. India', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Bull. Eng. Geol. Environ.','Bull. Eng. Geol. Environ.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Bull. Environ. Contam. Toxicol.','Bull. Environ. Contam. Toxicol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Calc. Var. Partial Differ. Equ.','Calc. Var. Partial Differ. Equ.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Chaos','Chaos', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Chaos Solitons Fractals','Chaos Solitons Fractals', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Chem. Phys.','Chem. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Chem. Phys. Lett.','Chem. Phys. Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Chin. Astron. Astrophys.','Chin. Astron. Astrophys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Chin. J. Astron. Astrophys.','Chin. J. Astron. Astrophys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Class. Quantum Gravity','Class. Quantum Gravity', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Clim. Dyn.','Clim. Dyn.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Colloid Polym. Sci.','Colloid Polym. Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Combinatorica','Combinatorica', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Combust. Theory Model.','Combust. Theory Model.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Commun. Math. Phys.','Commun. Math. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Comment. Math. Helv.','Comment. Math. Helv.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Comput. Mech.','Comput. Mech.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Comput. Phys.','Comput. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Comput. Phys. Commun.','Comput. Phys. Commun.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Comput. Sci. Eng.','Comput. Sci. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Comput. Vis. Sci.','Comput. Vis. Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Computing','Computing', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Constr. Approx.','Constr. Approx.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Contin. Mech. Thermodyn.','Contin. Mech. Thermodyn.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Contrib. Astron. Obs. Skaln. Pleso','Contrib. Astron. Obs. Skaln. Pleso', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Contrib. Astron. Obs. Skaln. Pleso Suppl.','Contrib. Astron. Obs. Skaln. Pleso Suppl.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Cryogenics','Cryogenics', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Crystallogr. Rep.','Crystallogr. Rep.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Curr. Appl. Phys.','Curr. Appl. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Curr. Opin. Solid State Mater. Sci.','Curr. Opin. Solid State Mater. Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Discret. Comput. Geom.','Discret. Comput. Geom.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Displays','Displays', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Distrib. Comput.','Distrib. Comput.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Distrib. Syst. Eng.','Distrib. Syst. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Dokl. Phys.','Dokl. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Electrochem. Solid State Lett.','Electrochem. Solid State Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Electron. Lett.','Electron. Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Elem. Math.','Elem. Math.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Environ. Geol.','Environ. Geol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Environ. Manage.','Environ. Manage.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Eur. Biophys. J. Biophys. Lett.','Eur. Biophys. J. Biophys. Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Eur. J. Phys.','Eur. J. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Eur. Phys. J., A','Eur. Phys. J., A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Eur. Phys. J., Appl. Phys.','Eur. Phys. J., Appl. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Eur. Phys. J., B','Eur. Phys. J., B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Eur. Phys. J., C','Eur. Phys. J., C', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Eur. Phys. J., D','Eur. Phys. J., D', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Eur. Phys. J., E','Eur. Phys. J., E', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Europhys. Lett.','Europhys. Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Europhys. News','Europhys. News', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Exp. Fluids','Exp. Fluids', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Few-Body Syst.','Few-Body Syst.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Finan. Stoch.','Finan. Stoch.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Fluid Dyn. Res.','Fluid Dyn. Res.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Geom. Funct. Anal.','Geom. Funct. Anal.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Heat Mass Transf.','Heat Mass Transf.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('High Energy Phys. Libr. Webzine','High Energy Phys. Libr. Webzine', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('High Perform. Polym.','High Perform. Polym.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Circ. Devices Syst.','IEE Proc., Circ. Devices Syst.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Commun.','IEE Proc., Commun.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Comput. Digit. Tech.','IEE Proc., Comput. Digit. Tech.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Control Theory Appl.','IEE Proc., Control Theory Appl.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Electr. Power Appl.','IEE Proc., Electr. Power Appl.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Gener. Transm. Distrib.','IEE Proc., Gener. Transm. Distrib.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Microw. Antennas Propag.','IEE Proc., Microw. Antennas Propag.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Optoelectron.','IEE Proc., Optoelectron.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Radar, Sonar Navig.','IEE Proc., Radar, Sonar Navig.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Sci. Meas. Technol.','IEE Proc., Sci. Meas. Technol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Softw. Eng.','IEE Proc., Softw. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('IEE Proc., Vis. Image Signal Process.','IEE Proc., Vis. Image Signal Process.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Image Vis. Comput.','Image Vis. Comput.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Inform. Forsch. Entwickl.','Inform. Forsch. Entwickl.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Inform. Spektr.','Inform. Spektr.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Infrared Phys. Technol.','Infrared Phys. Technol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Int. J. Digit. Libr.','Int. J. Digit. Libr.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Int. J. Doc. Anal. Recogn.','Int. J. Doc. Anal. Recogn.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Int. J. Nonlinear Mech.','Int. J. Nonlinear Mech.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Int. J. Softw. Tools Technol. Transf.','Int. J. Softw. Tools Technol. Transf.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Invent. Math.','Invent. Math.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Inverse Probl.','Inverse Probl.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Acoust. Soc. Am.','J. Acoust. Soc. Am.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Aerosp. Eng.','J. Aerosp. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Alloys. Compounds','J. Alloys. Compounds', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Am. Assoc. Var. Star Obs.','J. Am. Assoc. Var. Star Obs.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Appl. Mech.','J. Appl. Mech.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Appl. Phys.','J. Appl. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Atmos. Solar Terrest. Phys.','J. Atmos. Solar Terrest. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Br. Astron. Assoc.','J. Br. Astron. Assoc.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Chem. Phys.','J. Chem. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Classif.','J. Classif.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Comput. Inf. Sci. Eng.','J. Comput. Inf. Sci. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Constr. Eng. Manage.','J. Constr. Eng. Manage.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Cryptol.','J. Cryptol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Cryst. Growth','J. Cryst. Growth', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Dyn. Syst. Meas. Control','J. Dyn. Syst. Meas. Control', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Electrochem. Soc.','J. Electrochem. Soc.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Electron Spectrosc. Relat. Phen.','J. Electron Spectrosc. Relat. Phen.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Electron. Imaging','J. Electron. Imaging', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Electron. Packag.','J. Electron. Packag.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Energy Eng.','J. Energy Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Energy Resour. Technol.','J. Energy Resour. Technol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Eng. Mater. Technol.','J. Eng. Mater. Technol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Eng. Mech.','J. Eng. Mech.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Environ. Eng.','J. Environ. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Exp. Theor. Phys., JETP','J. Exp. Theor. Phys., JETP', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Fluids Eng.','J. Fluids Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Geom. Phys.','J. Geom. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Heat Transf.','J. Heat Transf.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. High Energy Phys.','J. High Energy Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Korean Astron. Soc.','J. Korean Astron. Soc.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Lumin.','J. Lumin.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Magn. Magn. Mater.','J. Magn. Magn. Mater.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Manage. Eng.','J. Manage. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Manuf. Sci. Eng.','J. Manuf. Sci. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Mater. Civ. Eng.','J. Mater. Civ. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Math. Biol.','J. Math. Biol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Math. Phys.','J. Math. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Mech. Des.','J. Mech. Des.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Micromech. Microeng.','J. Micromech. Microeng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Opt.','J. Opt.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Phys., A','J. Phys., A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Phys., B','J. Phys., B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Phys., Condens. Matter','J. Phys., Condens. Matter', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Phys., D','J. Phys., D', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Phys., G','J. Phys., G', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Phys. I','J. Phys. I', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Phys. II','J. Phys. II', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Phys. III','J. Phys. III', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Phys. Chem. Ref. Data','J. Phys. Chem. Ref. Data', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Phys. Chem. Solids','J. Phys. Chem. Solids', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Quant. Spectrosc. Radiat. Transf.','J. Quant. Spectrosc. Radiat. Transf.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. R. Astron. Soc. Can.','J. R. Astron. Soc. Can.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Radio. Prot.','J. Radio. Prot.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Rheol.','J. Rheol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Solar Energy Eng.','J. Solar Energy Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Solid State Electrochem.','J. Solid State Electrochem.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Struct. Eng.','J. Struct. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Surv. Eng.','J. Surv. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Tribol.','J. Tribol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Turbomach.','J. Turbomach.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Vac. Sci. Technol.','J. Vac. Sci. Technol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Vac. Sci. Technol., A','J. Vac. Sci. Technol., A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Vac. Sci. Technol., B','J. Vac. Sci. Technol., B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('J. Vib. Acoust.','J. Vib. Acoust.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('JETP','JETP', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('JETP Lett.','JETP Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Low Temp. Phys.','Low Temp. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Mach. Vis. Appl.','Mach. Vis. Appl.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Mater. Res. Innov.','Mater. Res. Innov.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Mater. Sci. Eng., B','Mater. Sci. Eng., B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Math. Ann.','Math. Ann.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Math. Model. Numer. Anal.','Math. Model. Numer. Anal.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Math. Program.','Math. Program.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Math. Z.','Math. Z.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Meas. Sci. Technol.','Meas. Sci. Technol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Med. Phys.','Med. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Meteorit. Planet. Sci.','Meteorit. Planet. Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Microelectron. Eng.','Microelectron. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Micron','Micron', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Microsc. Microanal.','Microsc. Microanal.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Microsyst. Technol.','Microsyst. Technol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Mon. Not. R. Astron. Soc.','Mon. Not. R. Astron. Soc.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Multim. Syst.','Multim. Syst.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nanotech.','Nanotech.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Naturwiss.','Naturwiss.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Network','Network', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('New Astron.','New Astron.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('New Astron. Rev.','New Astron. Rev.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nonlinearity','Nonlinearity', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nucl. Instrum. Methods Phys. Res., A','Nucl. Instrum. Methods Phys. Res., A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nucl. Instrum. Methods Phys. Res., B','Nucl. Instrum. Methods Phys. Res., B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nucl. Phys. B, Proc. Suppl.','Nucl. Phys. B, Proc. Suppl.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nucl. Phys., A','Nucl. Phys., A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nucl. Phys., B','Nucl. Phys., B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Num. Math.','Num. Math.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nuovo Cimento, A','Nuovo Cimento, A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nuovo Cimento, B','Nuovo Cimento, B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nuovo Cimento, C','Nuovo Cimento, C', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Nuovo Cimento, D','Nuovo Cimento, D', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Obs.','Obs.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Opt. Commun.','Opt. Commun.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Opt. Eng.','Opt. Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Opt. Lasers Eng.','Opt. Lasers Eng.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Opt. Mater.','Opt. Mater.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Opt. Spectrosc.','Opt. Spectrosc.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. At. Nucl.','Phys. At. Nucl.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Chem. Miner.','Phys. Chem. Miner.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Educ.','Phys. Educ.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Fluids','Phys. Fluids', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Fluids, A','Phys. Fluids, A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Fluids, B','Phys. Fluids, B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Lett., A','Phys. Lett., A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Lett., B','Phys. Lett., B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Med. Biol.','Phys. Med. Biol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Part. Nucl.','Phys. Part. Nucl.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Plasmas','Phys. Plasmas', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Rep.','Phys. Rep.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Rev., A','Phys. Rev., A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Rev., B','Phys. Rev., B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Rev., C','Phys. Rev., C', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Rev., D','Phys. Rev., D', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Rev., E','Phys. Rev., E', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Rev., ser. 1','Phys. Rev., ser. 1', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Rev. Lett.','Phys. Rev. Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Rev. Spec. Top. Accel. Beams','Phys. Rev. Spec. Top. Accel. Beams', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Rev.','Phys. Rev.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys. Solid State','Phys. Solid State', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Physica, A','Physica, A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Physica, B','Physica, B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Physica, C','Physica, C', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Physica, D','Physica, D', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Physica, E','Physica, E', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Physiol. Meas.','Physiol. Meas.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Planet. Space Sci.','Planet. Space Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Plasma Phys. Control. Fusion','Plasma Phys. Control. Fusion', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Plasma Phys. Rep.','Plasma Phys. Rep.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Plasma Sources Sci. Technol.','Plasma Sources Sci. Technol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Polym. Bull.','Polym. Bull.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Powder Diffraction','Powder Diffraction', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Probab. Theory Relat. Fields','Probab. Theory Relat. Fields', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Proc. Astron. Soc. Aust.','Proc. Astron. Soc. Aust.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Proc. Nat. Acad. Sci.','Proc. Nat. Acad. Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Prog. Cryst. Growth Charact. Mat.','Prog. Cryst. Growth Charact. Mat.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Prog. Part. Nucl. Phys.','Prog. Part. Nucl. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Prog. Quantum Electron.','Prog. Quantum Electron.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Prog. Surf. Sci.','Prog. Surf. Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Program','Program', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Publ. Astron. Soc. Aust.','Publ. Astron. Soc. Aust.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Publ. Astron. Soc. Jpn.','Publ. Astron. Soc. Jpn.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Publ. Astron. Soc. Pac.','Publ. Astron. Soc. Pac.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Publ. Underst. Sci.','Publ. Underst. Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Pure Appl. Opt.: J. Eur. Opt. Soc. P. A','Pure Appl. Opt.: J. Eur. Opt. Soc. P. A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Quantum Semiclass. Opt.: J. Eur. Opt. Soc. P. B','Quantum Semiclass. Opt.: J. Eur. Opt. Soc. P. B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Radiat. Environ. Biophys.','Radiat. Environ. Biophys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Radiat. Meas.','Radiat. Meas.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Radiat. Phys. Chem.','Radiat. Phys. Chem.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Radiologe','Radiologe', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Radioprotection','Radioprotection', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Rep. Math. Phys.','Rep. Math. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Rep. Prog. Phys.','Rep. Prog. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Res. Exp. Med.','Res. Exp. Med.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Rev. Mex. Astron. Astrofis.','Rev. Mex. Astron. Astrofis.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Rev. Mod. Phys.','Rev. Mod. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Rev. Sci. Instrum.','Rev. Sci. Instrum.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Sel. Math., New Ser.','Sel. Math., New Ser.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Semicond.','Semicond.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Semicond. Sci. Technol.','Semicond. Sci. Technol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Shock Waves','Shock Waves', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('SIAM J. Appl. Math.','SIAM J. Appl. Math.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('SIAM J. Comput.','SIAM J. Comput.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('SIAM J. Math. Anal.','SIAM J. Math. Anal.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('SIAM J. Numer. Anal.','SIAM J. Numer. Anal.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('SIAM J. Optim.','SIAM J. Optim.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('SIAM Rev.','SIAM Rev.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Smart Mat. Struct.','Smart Mat. Struct.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Soft Comput.','Soft Comput.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Softw. Concepts Tools','Softw. Concepts Tools', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Solar Phys.','Solar Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Solid State Commun.','Solid State Commun.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Solid State Electron.','Solid State Electron.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Solid State Ion.','Solid State Ion.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Sov. Astron. Lett.','Sov. Astron. Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Superconductor Science and Technology','Superconductor Science and Technology', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Surf. Coatings Techn.','Surf. Coatings Techn.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Surf. Sci.','Surf. Sci.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Surf. Sci. Rep.','Surf. Sci. Rep.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Surf. Sci. Spectra','Surf. Sci. Spectra', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Synth. Metals','Synth. Metals', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Syst. Fam.','Syst. Fam.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Tech. Phys.','Tech. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Tech. Phys. Lett.','Tech. Phys. Lett.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Theor. Comput. Fluid Dyn.','Theor. Comput. Fluid Dyn.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Theory Comput. Syst.','Theory Comput. Syst.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Thin Solid Films','Thin Solid Films', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Tribol. Int.','Tribol. Int.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Ultramicroscopy','Ultramicroscopy', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Vacuum','Vacuum', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('VLDB J.','VLDB J.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Virtual J. Nanoscale Sci. Technol.','Virtual J. Nanoscale Sci. Technol.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Virtual J. Biol. Phys. Res.','Virtual J. Biol. Phys. Res.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Vis. Comput.','Vis. Comput.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Wave Motion','Wave Motion', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Waves Random Media','Waves Random Media', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Wear','Wear', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Z. Angew. Math. Phys.','Z. Angew. Math. Phys.', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Z. Phys., A','Z. Phys., A', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Z. Phys., B','Z. Phys., B', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Z. Phys., C','Z. Phys., C', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Zphys-e.C','Zphys-e.C', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('ATLAS eNews','ATLAS eNews', '2'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('PICTURE','unpublished', '3'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('PREPRINT','techreport', '3'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('ARTICLE','article', '3'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('REPORT','techreport', '3'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('BOOK','book', '3'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('THESIS','phdthesis', '3'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('POETRY','unpublished', '3'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('PHRVD','Phys. Rev., D', '4'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('Phys.Rev.D','Phys. Rev., D', '4'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('beta','β', '5'); INSERT INTO knwKBRVAL (m_key,m_value,id_knwKB) VALUES ('β','beta', '5'); -- crcLIBRARY demo data: INSERT INTO crcLIBRARY (name, address, email, phone, notes) VALUES ('Atlantis Main Library', 'CH-1211 Geneva 23', 'atlantis@cds.cern.ch', '1234567', ''); INSERT INTO crcLIBRARY (name, address, email, phone, notes) VALUES ('Atlantis HEP Library', 'CH-1211 Geneva 21', 'atlantis.hep@cds.cern.ch', '1234567', ''); -- crcITEM demo data: INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-34001', '34', '1', '', 'ABC-123', 'Book', '4 weeks', 'available', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-34002', '34', '2', '', 'HEP-12A', 'Book', '4 weeks', 'requested', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-33001', '33', '1', '', 'AZ.12-AK', 'Book', '4 weeks', 'on loan', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-32001', '32', '1', 'Reference', 'WDFG-54', 'Book', 'Not for loan', 'available', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-32002', '32', '2', '', 'RZ.612-MK', 'Book', '4 weeks', 'available', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-32003', '32', '1', '', 'RT-4654-E', 'Book', '4 weeks', 'missing', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-31001', '31', '2', '', '123LSKD', 'Book', '1 week', 'on loan', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-31002', '31', '1', '', 'QSQ452-S', 'Book', '1 week', 'available', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-30001', '30', '1', 'Reference', 'QSQS-52-S', 'Book', 'Not for loan', 'available', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-29001', '29', '1', '', 'AZD456-465', 'Book', '4 weeks', 'requested', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-28001', '28', '1', '', 'AZD5-456', 'Book', '4 weeks', 'available', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-27001', '27', '2', '', 'JLMQ-45-SQ', 'Book', '4 weeks', 'available', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-26001', '26', '1', '', 'AZD456-465', 'Book', '1 week', 'missing', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-25001', '25', '2', '', 'AGT-MLL5', 'Book', '4 weeks', 'on loan', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-24001', '24', '2', '', 'J56-475', 'Book', '4 weeks', 'available', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-23001', '23', '1', '', 'JHL-465.DS', 'Book', '4 weeks', 'requested', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-22001', '22', '1', '', 'AZD4E-865', 'Book', '1 week', 'requested', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); INSERT INTO crcITEM (barcode, id_bibrec, id_crcLIBRARY, collection, location, description, loan_period, status, creation_date, modification_date, number_of_requests) VALUES ('bc-21001', '21', '2', '', 'MLL-DS.63', 'Book', '4 weeks', 'available', '2008-07-21 00:00:00', '2008-07-21 00:00:00', '0'); -- crcLOAN demo data: INSERT INTO crcLOAN (id_crcBORROWER, id_bibrec, barcode, loaned_on, due_date, status, type, notes) VALUES ('4', '33', 'bc-33001', NOW(), NOW() + INTERVAL 30 DAY, 'on loan' ,'normal', ''); INSERT INTO crcLOAN (id_crcBORROWER, id_bibrec, barcode, loaned_on, due_date, status, type, notes) VALUES ('5', '31', 'bc-31001', NOW(), NOW() + INTERVAL 7 DAY, 'on loan' ,'normal', ''); INSERT INTO crcLOAN (id_crcBORROWER, id_bibrec, barcode, loaned_on, due_date, status, type, notes) VALUES ('5', '31', 'bc-25001', NOW(), NOW() + INTERVAL 30 DAY, 'on loan' ,'normal', ''); -- crcLOANREQUEST demo data: INSERT INTO crcLOANREQUEST (id_crcBORROWER, id_bibrec, barcode, period_of_interest_from, period_of_interest_to, status, notes, request_date) VALUES ('5', '34', 'bc-34002', NOW(), NOW() + INTERVAL 60 DAY, 'pending' , '', NOW()); INSERT INTO crcLOANREQUEST (id_crcBORROWER, id_bibrec, barcode, period_of_interest_from, period_of_interest_to, status, notes, request_date) VALUES ('6', '29', 'bc-29001', NOW(), NOW() + INTERVAL 45 DAY, 'pending' , '', NOW()); INSERT INTO crcLOANREQUEST (id_crcBORROWER, id_bibrec, barcode, period_of_interest_from, period_of_interest_to, status, notes, request_date) VALUES ('5', '33', 'bc-33001', NOW(), NOW() + INTERVAL 45 DAY, 'waiting' , '', NOW()); INSERT INTO crcLOANREQUEST (id_crcBORROWER, id_bibrec, barcode, period_of_interest_from, period_of_interest_to, status, notes, request_date) VALUES ('7', '22', 'bc-22001', NOW(), NOW() + INTERVAL 90 DAY, 'pending' , '', NOW()); -- crcBORROWER demo data: INSERT INTO crcBORROWER (name, email, phone, address, borrower_since, notes) VALUES ('Admin', 'admin@cds.cern.ch', '20003', '99-Z-019', '2008-07-21 00:00:00', ''); INSERT INTO crcBORROWER (name, email, phone, address, borrower_since, notes) VALUES ('Jekyll', 'jekyll@cds.cern.ch', '01234', '21-Z-019', '2008-07-21 00:00:00', ''); INSERT INTO crcBORROWER (name, email, phone, address, borrower_since, notes) VALUES ('Hyde', 'Hyde@cds.cern.ch', '01574', '22-Z-119', '2008-07-21 00:00:00', ''); INSERT INTO crcBORROWER (name, email, phone, address, borrower_since, notes) VALUES ('Dorian Gray', 'dorian.gray@cds.cern.ch', '33234', '38-Y-819', '2008-07-21 00:00:00', ''); INSERT INTO crcBORROWER (name, email, phone, address, borrower_since, notes) VALUES ('Romeo Montague', 'romeo.montague@cds.cern.ch', '93844', '98-W-859', '2008-07-21 00:00:00', ''); INSERT INTO crcBORROWER (name, email, phone, address, borrower_since, notes) VALUES ('Juliet Capulet', 'juliet.capulet@cds.cern.ch', '99874', '91-X-098', '2008-07-21 00:00:00', ''); INSERT INTO crcBORROWER (name, email, phone, address, borrower_since, notes) VALUES ('Benvolio Montague', 'benvolio.montague@cds.cern.ch', '32354', '93-P-019', '2008-07-21 00:00:00', ''); INSERT INTO crcBORROWER (name, email, phone, address, borrower_since, notes) VALUES ('Balthasar Montague', 'balthasar.montague@cds.cern.ch', '78644', '20-M-349', '2008-07-21 00:00:00', ''); -- switch on stemming for some indexes: UPDATE idxINDEX SET stemming_language='en' WHERE name IN ('global','abstract','keyword','title','fulltext'); -- exporting demo: INSERT INTO expJOB (jobname) VALUES ('sitemap'); INSERT INTO expJOB (jobname) VALUES ('googlescholar'); INSERT INTO expJOB (jobname) VALUES ('marcxml'); -- WebJournal demo: INSERT INTO jrnJOURNAL (id,name) VALUES(1,'AtlantisTimes'); INSERT INTO jrnISSUE (id_jrnJOURNAL,issue_number,issue_display,date_released,date_announced) VALUES (1,'02/2009','02-03/2009','2009-01-09','2009-01-09'); INSERT INTO jrnISSUE (id_jrnJOURNAL,issue_number,issue_display,date_released) VALUES (1,'03/2009','02-03/2009','2009-01-16'); -- BibAuthorID demo person assignment: INSERT INTO aidPERSONID VALUES (NULL,1,'uid','2','0','0'); INSERT INTO aidPERSONID VALUES (NULL,2,'uid','1','0','0'); INSERT INTO aidPERSONID VALUES (NULL,3,'uid','4','0','0'); INSERT INTO aidPERSONID VALUES (NULL,4,'uid','5','0','0'); INSERT INTO aidPERSONID VALUES (NULL,5,'uid','6','0','0'); INSERT INTO aidPERSONID VALUES (NULL,6,'uid','7','0','0'); INSERT INTO aidPERSONID VALUES (NULL,7,'uid','8','0','0'); -- end of file diff --git a/modules/webcomment/lib/webcomment.py b/modules/webcomment/lib/webcomment.py index e6229bdc8..01cf398bd 100644 --- a/modules/webcomment/lib/webcomment.py +++ b/modules/webcomment/lib/webcomment.py @@ -1,2002 +1,2002 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Comments and reviews for records """ __revision__ = "$Id$" # non Invenio imports: import time import math import os import cgi import re from datetime import datetime, timedelta # Invenio imports: from invenio.dbquery import run_sql from invenio.config import CFG_PREFIX, \ CFG_SITE_LANG, \ CFG_WEBALERT_ALERT_ENGINE_EMAIL,\ CFG_SITE_SUPPORT_EMAIL,\ CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL,\ CFG_SITE_URL,\ CFG_SITE_NAME,\ CFG_WEBCOMMENT_ALLOW_REVIEWS,\ CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS,\ CFG_WEBCOMMENT_ALLOW_COMMENTS,\ CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL,\ CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN,\ CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS,\ CFG_WEBCOMMENT_DEFAULT_MODERATOR, \ CFG_SITE_RECORD from invenio.webmessage_mailutils import \ email_quote_txt, \ email_quoted_txt2html from invenio.htmlutils import tidy_html from invenio.webuser import get_user_info, get_email, collect_user_info from invenio.dateutils import convert_datetext_to_dategui, \ datetext_default, \ convert_datestruct_to_datetext from invenio.mailutils import send_email from invenio.errorlib import register_exception from invenio.messages import wash_language, gettext_set_language from invenio.urlutils import wash_url_argument from invenio.webcomment_config import CFG_WEBCOMMENT_ACTION_CODE, \ CFG_WEBCOMMENT_EMAIL_REPLIES_TO, \ CFG_WEBCOMMENT_ROUND_DATAFIELD, \ CFG_WEBCOMMENT_RESTRICTION_DATAFIELD, \ CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH, \ InvenioWebCommentError, \ InvenioWebCommentWarning from invenio.access_control_engine import acc_authorize_action from invenio.search_engine import \ guess_primary_collection_of_a_record, \ check_user_can_view_record, \ - get_fieldvalues, \ get_collection_reclist, \ get_colID +from invenio.search_engine_utils import get_fieldvalues from invenio.webcomment_washer import EmailWasher try: import invenio.template webcomment_templates = invenio.template.load('webcomment') except: pass def perform_request_display_comments_or_remarks(req, recID, display_order='od', display_since='all', nb_per_page=100, page=1, ln=CFG_SITE_LANG, voted=-1, reported=-1, subscribed=0, reviews=0, uid=-1, can_send_comments=False, can_attach_files=False, user_is_subscribed_to_discussion=False, user_can_unsubscribe_from_discussion=False, display_comment_rounds=None): """ Returns all the comments (reviews) of a specific internal record or external basket record. @param recID: record id where (internal record IDs > 0) or (external basket record IDs < -100) @param display_order: hh = highest helpful score, review only lh = lowest helpful score, review only hs = highest star score, review only ls = lowest star score, review only od = oldest date nd = newest date @param display_since: all= no filtering by date nd = n days ago nw = n weeks ago nm = n months ago ny = n years ago where n is a single digit integer between 0 and 9 @param nb_per_page: number of results per page @param page: results page @param voted: boolean, active if user voted for a review, see perform_request_vote function @param reported: boolean, active if user reported a certain comment/review, perform_request_report function @param subscribed: int, 1 if user just subscribed to discussion, -1 if unsubscribed @param reviews: boolean, enabled if reviews, disabled for comments @param uid: the id of the user who is reading comments @param can_send_comments: if user can send comment or not @param can_attach_files: if user can attach file to comment or not @param user_is_subscribed_to_discussion: True if user already receives new comments by email @param user_can_unsubscribe_from_discussion: True is user is allowed to unsubscribe from discussion @return html body. """ _ = gettext_set_language(ln) warnings = [] nb_reviews = 0 nb_comments = 0 # wash arguments recID = wash_url_argument(recID, 'int') ln = wash_language(ln) display_order = wash_url_argument(display_order, 'str') display_since = wash_url_argument(display_since, 'str') nb_per_page = wash_url_argument(nb_per_page, 'int') page = wash_url_argument(page, 'int') voted = wash_url_argument(voted, 'int') reported = wash_url_argument(reported, 'int') reviews = wash_url_argument(reviews, 'int') # vital argument check (valid, error_body) = check_recID_is_in_range(recID, warnings, ln) if not(valid): return error_body # CERN hack begins: filter out ATLAS comments from invenio.config import CFG_CERN_SITE if CFG_CERN_SITE: restricted_comments_p = False for report_number in get_fieldvalues(recID, '088__a'): if report_number.startswith("ATL-"): restricted_comments_p = True break if restricted_comments_p: err_code, err_msg = acc_authorize_action(uid, 'viewrestrcoll', collection='ATLAS Communications') if err_code: return err_msg # CERN hack ends # Query the database and filter results user_info = collect_user_info(uid) res = query_retrieve_comments_or_remarks(recID, display_order, display_since, reviews, user_info=user_info) res2 = query_retrieve_comments_or_remarks(recID, display_order, display_since, not reviews, user_info=user_info) nb_res = len(res) if reviews: nb_reviews = nb_res nb_comments = len(res2) else: nb_reviews = len(res2) nb_comments = nb_res # checking non vital arguemnts - will be set to default if wrong #if page <= 0 or page.lower() != 'all': if page < 0: page = 1 try: raise InvenioWebCommentWarning(_('Bad page number --> showing first page.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_INVALID_PAGE_NB',)) if nb_per_page < 0: nb_per_page = 100 try: raise InvenioWebCommentWarning(_('Bad number of results per page --> showing 10 results per page.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_INVALID_NB_RESULTS_PER_PAGE',)) if CFG_WEBCOMMENT_ALLOW_REVIEWS and reviews: if display_order not in ['od', 'nd', 'hh', 'lh', 'hs', 'ls']: display_order = 'hh' try: raise InvenioWebCommentWarning(_('Bad display order --> showing most helpful first.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_INVALID_REVIEW_DISPLAY_ORDER',)) else: if display_order not in ['od', 'nd']: display_order = 'od' try: raise InvenioWebCommentWarning(_('Bad display order --> showing oldest first.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_INVALID_DISPLAY_ORDER',)) if not display_comment_rounds: display_comment_rounds = [] # filter results according to page and number of reults per page if nb_per_page > 0: if nb_res > 0: last_page = int(math.ceil(nb_res / float(nb_per_page))) else: last_page = 1 if page > last_page: page = 1 try: raise InvenioWebCommentWarning(_('Bad page number --> showing first page.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(("WRN_WEBCOMMENT_INVALID_PAGE_NB",)) if nb_res > nb_per_page: # if more than one page of results if page < last_page: res = res[(page-1)*(nb_per_page) : (page*nb_per_page)] else: res = res[(page-1)*(nb_per_page) : ] else: # one page of results pass else: last_page = 1 # Send to template avg_score = 0.0 if not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS: # comments not allowed by admin try: raise InvenioWebCommentError(_('Comments on records have been disallowed by the administrator.')) except InvenioWebCommentError, exc: register_exception(req=req) body = webcomment_templates.tmpl_error(exc.message, ln) return body # errors.append(('ERR_WEBCOMMENT_COMMENTS_NOT_ALLOWED',)) if reported > 0: try: raise InvenioWebCommentWarning(_('Your feedback has been recorded, many thanks.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, 'green')) #warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED',)) elif reported == 0: try: raise InvenioWebCommentWarning(_('You have already reported an abuse for this comment.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_ALREADY_REPORTED',)) elif reported == -2: try: raise InvenioWebCommentWarning(_('The comment you have reported no longer exists.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_INVALID_REPORT',)) if CFG_WEBCOMMENT_ALLOW_REVIEWS and reviews: avg_score = calculate_avg_score(res) if voted > 0: try: raise InvenioWebCommentWarning(_('Your feedback has been recorded, many thanks.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, 'green')) #warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED',)) elif voted == 0: try: raise InvenioWebCommentWarning(_('Sorry, you have already voted. This vote has not been recorded.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_ALREADY_VOTED',)) if subscribed == 1: try: raise InvenioWebCommentWarning(_('You have been subscribed to this discussion. From now on, you will receive an email whenever a new comment is posted.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, 'green')) #warnings.append(('WRN_WEBCOMMENT_SUBSCRIBED',)) elif subscribed == -1: try: raise InvenioWebCommentWarning(_('You have been unsubscribed from this discussion.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, 'green')) #warnings.append(('WRN_WEBCOMMENT_UNSUBSCRIBED',)) grouped_comments = group_comments_by_round(res, reviews) # Clean list of comments round names if not display_comment_rounds: display_comment_rounds = [] elif 'all' in display_comment_rounds: display_comment_rounds = [cmtgrp[0] for cmtgrp in grouped_comments] elif 'latest' in display_comment_rounds: if grouped_comments: display_comment_rounds.append(grouped_comments[-1][0]) display_comment_rounds.remove('latest') body = webcomment_templates.tmpl_get_comments(req, recID, ln, nb_per_page, page, last_page, display_order, display_since, CFG_WEBCOMMENT_ALLOW_REVIEWS, grouped_comments, nb_comments, avg_score, warnings, border=0, reviews=reviews, total_nb_reviews=nb_reviews, uid=uid, can_send_comments=can_send_comments, can_attach_files=can_attach_files, user_is_subscribed_to_discussion=\ user_is_subscribed_to_discussion, user_can_unsubscribe_from_discussion=\ user_can_unsubscribe_from_discussion, display_comment_rounds=display_comment_rounds) return body def perform_request_vote(cmt_id, client_ip_address, value, uid=-1): """ Vote positively or negatively for a comment/review @param cmt_id: review id @param value: +1 for voting positively -1 for voting negatively @return: integer 1 if successful, integer 0 if not """ cmt_id = wash_url_argument(cmt_id, 'int') client_ip_address = wash_url_argument(client_ip_address, 'str') value = wash_url_argument(value, 'int') uid = wash_url_argument(uid, 'int') if cmt_id > 0 and value in [-1, 1] and check_user_can_vote(cmt_id, client_ip_address, uid): action_date = convert_datestruct_to_datetext(time.localtime()) action_code = CFG_WEBCOMMENT_ACTION_CODE['VOTE'] query = """INSERT INTO cmtACTIONHISTORY (id_cmtRECORDCOMMENT, id_bibrec, id_user, client_host, action_time, action_code) VALUES (%s, NULL ,%s, inet_aton(%s), %s, %s)""" params = (cmt_id, uid, client_ip_address, action_date, action_code) run_sql(query, params) return query_record_useful_review(cmt_id, value) else: return 0 def check_user_can_comment(recID, client_ip_address, uid=-1): """ Check if a user hasn't already commented within the last seconds time limit: CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS @param recID: record id @param client_ip_address: IP => use: str(req.remote_ip) @param uid: user id, as given by invenio.webuser.getUid(req) """ recID = wash_url_argument(recID, 'int') client_ip_address = wash_url_argument(client_ip_address, 'str') uid = wash_url_argument(uid, 'int') max_action_time = time.time() - CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS max_action_time = convert_datestruct_to_datetext(time.localtime(max_action_time)) action_code = CFG_WEBCOMMENT_ACTION_CODE['ADD_COMMENT'] query = """SELECT id_bibrec FROM cmtACTIONHISTORY WHERE id_bibrec=%s AND action_code=%s AND action_time>%s """ params = (recID, action_code, max_action_time) if uid < 0: query += " AND client_host=inet_aton(%s)" params += (client_ip_address,) else: query += " AND id_user=%s" params += (uid,) res = run_sql(query, params) return len(res) == 0 def check_user_can_review(recID, client_ip_address, uid=-1): """ Check if a user hasn't already reviewed within the last seconds time limit: CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS @param recID: record ID @param client_ip_address: IP => use: str(req.remote_ip) @param uid: user id, as given by invenio.webuser.getUid(req) """ action_code = CFG_WEBCOMMENT_ACTION_CODE['ADD_REVIEW'] query = """SELECT id_bibrec FROM cmtACTIONHISTORY WHERE id_bibrec=%s AND action_code=%s """ params = (recID, action_code) if uid < 0: query += " AND client_host=inet_aton(%s)" params += (client_ip_address,) else: query += " AND id_user=%s" params += (uid,) res = run_sql(query, params) return len(res) == 0 def check_user_can_vote(cmt_id, client_ip_address, uid=-1): """ Checks if a user hasn't already voted @param cmt_id: comment id @param client_ip_address: IP => use: str(req.remote_ip) @param uid: user id, as given by invenio.webuser.getUid(req) """ cmt_id = wash_url_argument(cmt_id, 'int') client_ip_address = wash_url_argument(client_ip_address, 'str') uid = wash_url_argument(uid, 'int') query = """SELECT id_cmtRECORDCOMMENT FROM cmtACTIONHISTORY WHERE id_cmtRECORDCOMMENT=%s""" params = (cmt_id,) if uid < 0: query += " AND client_host=inet_aton(%s)" params += (client_ip_address,) else: query += " AND id_user=%s" params += (uid, ) res = run_sql(query, params) return (len(res) == 0) def get_comment_collection(cmt_id): """ Extract the collection where the comment is written """ query = "SELECT id_bibrec FROM cmtRECORDCOMMENT WHERE id=%s" recid = run_sql(query, (cmt_id,)) record_primary_collection = guess_primary_collection_of_a_record(recid[0][0]) return record_primary_collection def get_collection_moderators(collection): """ Return the list of comment moderators for the given collection. """ from invenio.access_control_engine import acc_get_authorized_emails res = list(acc_get_authorized_emails('moderatecomments', collection=collection)) if not res: return [CFG_WEBCOMMENT_DEFAULT_MODERATOR,] return res def perform_request_report(cmt_id, client_ip_address, uid=-1): """ Report a comment/review for inappropriate content. Will send an email to the administrator if number of reports is a multiple of CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN @param cmt_id: comment id @return: integer 1 if successful, integer 0 if not. -2 if comment does not exist """ cmt_id = wash_url_argument(cmt_id, 'int') if cmt_id <= 0: return 0 (query_res, nb_abuse_reports) = query_record_report_this(cmt_id) if query_res == 0: return 0 elif query_res == -2: return -2 if not(check_user_can_report(cmt_id, client_ip_address, uid)): return 0 action_date = convert_datestruct_to_datetext(time.localtime()) action_code = CFG_WEBCOMMENT_ACTION_CODE['REPORT_ABUSE'] query = """INSERT INTO cmtACTIONHISTORY (id_cmtRECORDCOMMENT, id_bibrec, id_user, client_host, action_time, action_code) VALUES (%s, NULL, %s, inet_aton(%s), %s, %s)""" params = (cmt_id, uid, client_ip_address, action_date, action_code) run_sql(query, params) if nb_abuse_reports % CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN == 0: (cmt_id2, id_bibrec, id_user, cmt_body, cmt_date, cmt_star, cmt_vote, cmt_nb_votes_total, cmt_title, cmt_reported, round_name, restriction) = query_get_comment(cmt_id) (user_nb_abuse_reports, user_votes, user_nb_votes_total) = query_get_user_reports_and_votes(int(id_user)) (nickname, user_email, last_login) = query_get_user_contact_info(id_user) from_addr = '%s Alert Engine <%s>' % (CFG_SITE_NAME, CFG_WEBALERT_ALERT_ENGINE_EMAIL) comment_collection = get_comment_collection(cmt_id) to_addrs = get_collection_moderators(comment_collection) subject = "A comment has been reported as inappropriate by a user" body = ''' The following comment has been reported a total of %(cmt_reported)s times. Author: nickname = %(nickname)s email = %(user_email)s user_id = %(uid)s This user has: total number of reports = %(user_nb_abuse_reports)s %(votes)s Comment: comment_id = %(cmt_id)s record_id = %(id_bibrec)s date written = %(cmt_date)s nb reports = %(cmt_reported)s %(review_stuff)s body = ---start body--- %(cmt_body)s ---end body--- Please go to the record page %(comment_admin_link)s to delete this message if necessary. A warning will be sent to the user in question.''' % \ { 'cfg-report_max' : CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN, 'nickname' : nickname, 'user_email' : user_email, 'uid' : id_user, 'user_nb_abuse_reports' : user_nb_abuse_reports, 'user_votes' : user_votes, 'votes' : CFG_WEBCOMMENT_ALLOW_REVIEWS and \ "total number of positive votes\t= %s\n\t\ttotal number of negative votes\t= %s" % \ (user_votes, (user_nb_votes_total - user_votes)) or "\n", 'cmt_id' : cmt_id, 'id_bibrec' : id_bibrec, 'cmt_date' : cmt_date, 'cmt_reported' : cmt_reported, 'review_stuff' : CFG_WEBCOMMENT_ALLOW_REVIEWS and \ "star score\t= %s\n\treview title\t= %s" % (cmt_star, cmt_title) or "", 'cmt_body' : cmt_body, 'comment_admin_link' : CFG_SITE_URL + "/"+ CFG_SITE_RECORD +"/" + str(id_bibrec) + '/comments#' + str(cmt_id), 'user_admin_link' : "user_admin_link" #! FIXME } #FIXME to be added to email when websession module is over: #If you wish to ban the user, you can do so via the User Admin Panel %(user_admin_link)s. send_email(from_addr, to_addrs, subject, body) return 1 def check_user_can_report(cmt_id, client_ip_address, uid=-1): """ Checks if a user hasn't already reported a comment @param cmt_id: comment id @param client_ip_address: IP => use: str(req.remote_ip) @param uid: user id, as given by invenio.webuser.getUid(req) """ cmt_id = wash_url_argument(cmt_id, 'int') client_ip_address = wash_url_argument(client_ip_address, 'str') uid = wash_url_argument(uid, 'int') query = """SELECT id_cmtRECORDCOMMENT FROM cmtACTIONHISTORY WHERE id_cmtRECORDCOMMENT=%s""" params = (uid,) if uid < 0: query += " AND client_host=inet_aton(%s)" params += (client_ip_address,) else: query += " AND id_user=%s" params += (uid,) res = run_sql(query, params) return (len(res) == 0) def query_get_user_contact_info(uid): """ Get the user contact information @return: tuple (nickname, email, last_login), if none found return () Note: for the moment, if no nickname, will return email address up to the '@' """ query1 = """SELECT nickname, email, DATE_FORMAT(last_login, '%%Y-%%m-%%d %%H:%%i:%%s') FROM user WHERE id=%s""" params1 = (uid,) res1 = run_sql(query1, params1) if res1: return res1[0] else: return () def query_get_user_reports_and_votes(uid): """ Retrieve total number of reports and votes of a particular user @param uid: user id @return: tuple (total_nb_reports, total_nb_votes_yes, total_nb_votes_total) if none found return () """ query1 = """SELECT nb_votes_yes, nb_votes_total, nb_abuse_reports FROM cmtRECORDCOMMENT WHERE id_user=%s""" params1 = (uid,) res1 = run_sql(query1, params1) if len(res1) == 0: return () nb_votes_yes = nb_votes_total = nb_abuse_reports = 0 for cmt_tuple in res1: nb_votes_yes += int(cmt_tuple[0]) nb_votes_total += int(cmt_tuple[1]) nb_abuse_reports += int(cmt_tuple[2]) return (nb_abuse_reports, nb_votes_yes, nb_votes_total) def query_get_comment(comID): """ Get all fields of a comment @param comID: comment id @return: tuple (comID, id_bibrec, id_user, body, date_creation, star_score, nb_votes_yes, nb_votes_total, title, nb_abuse_reports, round_name, restriction) if none found return () """ query1 = """SELECT id, id_bibrec, id_user, body, DATE_FORMAT(date_creation, '%%Y-%%m-%%d %%H:%%i:%%s'), star_score, nb_votes_yes, nb_votes_total, title, nb_abuse_reports, round_name, restriction FROM cmtRECORDCOMMENT WHERE id=%s""" params1 = (comID,) res1 = run_sql(query1, params1) if len(res1)>0: return res1[0] else: return () def query_record_report_this(comID): """ Increment the number of reports for a comment @param comID: comment id @return: tuple (success, new_total_nb_reports_for_this_comment) where success is integer 1 if success, integer 0 if not, -2 if comment does not exist """ #retrieve nb_abuse_reports query1 = "SELECT nb_abuse_reports FROM cmtRECORDCOMMENT WHERE id=%s" params1 = (comID,) res1 = run_sql(query1, params1) if len(res1) == 0: return (-2, 0) #increment and update nb_abuse_reports = int(res1[0][0]) + 1 query2 = "UPDATE cmtRECORDCOMMENT SET nb_abuse_reports=%s WHERE id=%s" params2 = (nb_abuse_reports, comID) res2 = run_sql(query2, params2) return (int(res2), nb_abuse_reports) def query_record_useful_review(comID, value): """ private funciton Adjust the number of useful votes and number of total votes for a comment. @param comID: comment id @param value: +1 or -1 @return: integer 1 if successful, integer 0 if not """ # retrieve nb_useful votes query1 = "SELECT nb_votes_total, nb_votes_yes FROM cmtRECORDCOMMENT WHERE id=%s" params1 = (comID,) res1 = run_sql(query1, params1) if len(res1)==0: return 0 # modify and insert new nb_useful votes nb_votes_yes = int(res1[0][1]) if value >= 1: nb_votes_yes = int(res1[0][1]) + 1 nb_votes_total = int(res1[0][0]) + 1 query2 = "UPDATE cmtRECORDCOMMENT SET nb_votes_total=%s, nb_votes_yes=%s WHERE id=%s" params2 = (nb_votes_total, nb_votes_yes, comID) res2 = run_sql(query2, params2) return int(res2) def query_retrieve_comments_or_remarks(recID, display_order='od', display_since='0000-00-00 00:00:00', ranking=0, limit='all', user_info=None): """ Private function Retrieve tuple of comments or remarks from the database @param recID: record id @param display_order: hh = highest helpful score lh = lowest helpful score hs = highest star score ls = lowest star score od = oldest date nd = newest date @param display_since: datetime, e.g. 0000-00-00 00:00:00 @param ranking: boolean, enabled if reviews, disabled for comments @param limit: number of comments/review to return @return: tuple of comment where comment is tuple (nickname, uid, date_creation, body, status, id) if ranking disabled or tuple (nickname, uid, date_creation, body, status, nb_votes_yes, nb_votes_total, star_score, title, id) Note: for the moment, if no nickname, will return email address up to '@' """ display_since = calculate_start_date(display_since) order_dict = { 'hh' : "cmt.nb_votes_yes/(cmt.nb_votes_total+1) DESC, cmt.date_creation DESC ", 'lh' : "cmt.nb_votes_yes/(cmt.nb_votes_total+1) ASC, cmt.date_creation ASC ", 'ls' : "cmt.star_score ASC, cmt.date_creation DESC ", 'hs' : "cmt.star_score DESC, cmt.date_creation DESC ", 'nd' : "cmt.reply_order_cached_data DESC ", 'od' : "cmt.reply_order_cached_data ASC " } # Ranking only done for comments and when allowed if ranking and recID > 0: try: display_order = order_dict[display_order] except: display_order = order_dict['od'] else: # in case of recID > 0 => external record => no ranking! ranking = 0 try: if display_order[-1] == 'd': display_order = order_dict[display_order] else: display_order = order_dict['od'] except: display_order = order_dict['od'] #display_order = order_dict['nd'] query = """SELECT user.nickname, cmt.id_user, DATE_FORMAT(cmt.date_creation, '%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s'), cmt.body, cmt.status, cmt.nb_abuse_reports, %(ranking)s cmt.id, cmt.round_name, cmt.restriction, %(reply_to_column)s FROM cmtRECORDCOMMENT cmt LEFT JOIN user ON user.id=cmt.id_user WHERE cmt.id_bibrec=%%s %(ranking_only)s %(display_since)s ORDER BY %(display_order)s """ % {'ranking' : ranking and ' cmt.nb_votes_yes, cmt.nb_votes_total, cmt.star_score, cmt.title, ' or '', 'ranking_only' : ranking and ' AND cmt.star_score>0 ' or ' AND cmt.star_score=0 ', # 'id_bibrec' : recID > 0 and 'cmt.id_bibrec' or 'cmt.id_bibrec_or_bskEXTREC', # 'table' : recID > 0 and 'cmtRECORDCOMMENT' or 'bskRECORDCOMMENT', 'display_since' : display_since == '0000-00-00 00:00:00' and ' ' or 'AND cmt.date_creation>=\'%s\' ' % display_since, 'display_order': display_order, 'reply_to_column': recID > 0 and 'cmt.in_reply_to_id_cmtRECORDCOMMENT' or 'cmt.in_reply_to_id_bskRECORDCOMMENT'} params = (recID,) res = run_sql(query, params) # return res new_limit = limit comments_list = [] for row in res: if ranking: # when dealing with reviews, row[12] holds restriction info: restriction = row[12] else: # when dealing with comments, row[8] holds restriction info: restriction = row[8] if user_info and check_user_can_view_comment(user_info, None, restriction)[0] != 0: # User cannot view comment. Look further continue comments_list.append(row) if limit.isdigit(): new_limit -= 1 if limit < 1: break if comments_list: if limit.isdigit(): return comments_list[:limit] else: return comments_list return () ## def get_comment_children(comID): ## """ ## Returns the list of children (i.e. direct descendants) ordered by time of addition. ## @param comID: the ID of the comment for which we want to retrieve children ## @type comID: int ## @return the list of children ## @rtype: list ## """ ## res = run_sql("SELECT id FROM cmtRECORDCOMMENT WHERE in_reply_to_id_cmtRECORDCOMMENT=%s", (comID,)) ## return [row[0] for row in res] ## def get_comment_descendants(comID, depth=None): ## """ ## Returns the list of descendants of the given comment, orderd from ## oldest to newest ("top-down"), down to depth specified as parameter. ## @param comID: the ID of the comment for which we want to retrieve descendant ## @type comID: int ## @param depth: the max depth down to which we want to retrieve ## descendants. Specify None for no limit, 1 for direct ## children only, etc. ## @return the list of ancestors ## @rtype: list(tuple(comment ID, descendants comments IDs)) ## """ ## if depth == 0: ## return (comID, []) ## res = run_sql("SELECT id FROM cmtRECORDCOMMENT WHERE in_reply_to_id_cmtRECORDCOMMENT=%s", (comID,)) ## if res: ## children_comID = [row[0] for row in res] ## children_descendants = [] ## if depth: ## depth -= 1 ## children_descendants = [get_comment_descendants(child_comID, depth) for child_comID in children_comID] ## return (comID, children_descendants) ## else: ## return (comID, []) def get_comment_ancestors(comID, depth=None): """ Returns the list of ancestors of the given comment, ordered from oldest to newest ("top-down": direct parent of comID is at last position), up to given depth @param comID: the ID of the comment for which we want to retrieve ancestors @type comID: int @param depth: the maximum of levels up from the given comment we want to retrieve ancestors. None for no limit, 1 for direct parent only, etc. @type depth: int @return the list of ancestors @rtype: list """ if depth == 0: return [] res = run_sql("SELECT in_reply_to_id_cmtRECORDCOMMENT FROM cmtRECORDCOMMENT WHERE id=%s", (comID,)) if res: parent_comID = res[0][0] if parent_comID == 0: return [] parent_ancestors = [] if depth: depth -= 1 parent_ancestors = get_comment_ancestors(parent_comID, depth) parent_ancestors.append(parent_comID) return parent_ancestors else: return [] def get_reply_order_cache_data(comid): """ Prepare a representation of the comment ID given as parameter so that it is suitable for byte ordering in MySQL. """ return "%s%s%s%s" % (chr((comid >> 24) % 256), chr((comid >> 16) % 256), chr((comid >> 8) % 256), chr(comid % 256)) def query_add_comment_or_remark(reviews=0, recID=0, uid=-1, msg="", note="", score=0, priority=0, client_ip_address='', editor_type='textarea', req=None, reply_to=None, attached_files=None): """ Private function Insert a comment/review or remarkinto the database @param recID: record id @param uid: user id @param msg: comment body @param note: comment title @param score: review star score @param priority: remark priority #!FIXME @param editor_type: the kind of editor used to submit the comment: 'textarea', 'fckeditor' @param req: request object. If provided, email notification are sent after we reply to user request. @param reply_to: the id of the comment we are replying to with this inserted comment. @return: integer >0 representing id if successful, integer 0 if not """ current_date = calculate_start_date('0d') #change utf-8 message into general unicode msg = msg.decode('utf-8') note = note.decode('utf-8') #change general unicode back to utf-8 msg = msg.encode('utf-8') note = note.encode('utf-8') (restriction, round_name) = get_record_status(recID) if attached_files is None: attached_files = {} if reply_to and CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH >= 0: # Check that we have not reached max depth comment_ancestors = get_comment_ancestors(reply_to) if len(comment_ancestors) >= CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH: if CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH == 0: reply_to = None else: reply_to = comment_ancestors[CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH - 1] # Inherit restriction and group/round of 'parent' comment = query_get_comment(reply_to) if comment: (round_name, restriction) = comment[10:12] if editor_type == 'fckeditor': # Here we remove the line feeds introduced by FCKeditor (they # have no meaning for the user) and replace the HTML line # breaks by linefeeds, so that we are close to an input that # would be done without the FCKeditor. That's much better if a # reply to a comment is made with a browser that does not # support FCKeditor. msg = msg.replace('\n', '').replace('\r', '') msg = re.sub('
    )', '\n', msg) msg = msg.replace(' ', ' ') # We clean the quotes that could have been introduced by # FCKeditor when clicking the 'quote' button, as well as those # that we have introduced when quoting the original message msg = re.sub('\s*', '>>', msg) msg = re.sub('\s*', '', msg) # Then definitely remove any blockquote, whatever it is msg = re.sub('', '
    ', msg) msg = re.sub('', '
    ', msg) # Tidy up the HTML msg = tidy_html(msg) # In case additional

    or

    got inserted, interpret # these as new lines (with a sad trick to do it only once) msg = msg.replace('
    <', '\n<') msg = msg.replace('

    <', '

    \n<') query = """INSERT INTO cmtRECORDCOMMENT (id_bibrec, id_user, body, date_creation, star_score, nb_votes_total, title, round_name, restriction, in_reply_to_id_cmtRECORDCOMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" params = (recID, uid, msg, current_date, score, 0, note, round_name, restriction, reply_to or 0) res = run_sql(query, params) if res: new_comid = int(res) move_attached_files_to_storage(attached_files, recID, new_comid) parent_reply_order = run_sql("""SELECT reply_order_cached_data from cmtRECORDCOMMENT where id=%s""", (reply_to,)) if not parent_reply_order or parent_reply_order[0][0] is None: # This is not a reply, but a first 0-level comment parent_reply_order = '' else: parent_reply_order = parent_reply_order[0][0] run_sql("""UPDATE cmtRECORDCOMMENT SET reply_order_cached_data=%s WHERE id=%s""", (parent_reply_order + get_reply_order_cache_data(new_comid), new_comid)) action_code = CFG_WEBCOMMENT_ACTION_CODE[reviews and 'ADD_REVIEW' or 'ADD_COMMENT'] action_time = convert_datestruct_to_datetext(time.localtime()) query2 = """INSERT INTO cmtACTIONHISTORY (id_cmtRECORDCOMMENT, id_bibrec, id_user, client_host, action_time, action_code) VALUES (%s, %s, %s, inet_aton(%s), %s, %s)""" params2 = (res, recID, uid, client_ip_address, action_time, action_code) run_sql(query2, params2) def notify_subscribers_callback(data): """ Define a callback that retrieves subscribed users, and notify them by email. @param data: contains the necessary parameters in a tuple: (recid, uid, comid, msg, note, score, editor_type, reviews) """ recid, uid, comid, msg, note, score, editor_type, reviews = data # Email this comment to 'subscribers' (subscribers_emails1, subscribers_emails2) = \ get_users_subscribed_to_discussion(recid) email_subscribers_about_new_comment(recid, reviews=reviews, emails1=subscribers_emails1, emails2=subscribers_emails2, comID=comid, msg=msg, note=note, score=score, editor_type=editor_type, uid=uid) # Register our callback to notify subscribed people after # having replied to our current user. data = (recID, uid, res, msg, note, score, editor_type, reviews) if req: req.register_cleanup(notify_subscribers_callback, data) else: notify_subscribers_callback(data) return int(res) def move_attached_files_to_storage(attached_files, recID, comid): """ Move the files that were just attached to a new comment to their final location. @param attached_files: the mappings of desired filename to attach and path where to find the original file @type attached_files: dict {filename, filepath} @param recID: the record ID to which we attach the files @param comid: the comment ID to which we attach the files """ for filename, filepath in attached_files.iteritems(): os.renames(filepath, os.path.join(CFG_PREFIX, 'var', 'data', 'comments', str(recID), str(comid), filename)) def get_attached_files(recid, comid): """ Returns a list with tuples (filename, filepath, fileurl) @param recid: the recid to which the comment belong @param comid: the commment id for which we want to retrieve files """ base_dir = os.path.join(CFG_PREFIX, 'var', 'data', 'comments', str(recid), str(comid)) if os.path.isdir(base_dir): filenames = os.listdir(base_dir) return [(filename, os.path.join(CFG_PREFIX, 'var', 'data', 'comments', str(recid), str(comid), filename), CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + str(recid) + '/comments/attachments/get/' + str(comid) + '/' + filename) \ for filename in filenames] else: return [] def subscribe_user_to_discussion(recID, uid): """ Subscribe a user to a discussion, so the she receives by emails all new new comments for this record. @param recID: record ID corresponding to the discussion we want to subscribe the user @param uid: user id """ query = """INSERT INTO cmtSUBSCRIPTION (id_bibrec, id_user, creation_time) VALUES (%s, %s, %s)""" params = (recID, uid, convert_datestruct_to_datetext(time.localtime())) try: run_sql(query, params) except: return 0 return 1 def unsubscribe_user_from_discussion(recID, uid): """ Unsubscribe users from a discussion. @param recID: record ID corresponding to the discussion we want to unsubscribe the user @param uid: user id @return 1 if successful, 0 if not """ query = """DELETE FROM cmtSUBSCRIPTION WHERE id_bibrec=%s AND id_user=%s""" params = (recID, uid) try: res = run_sql(query, params) except: return 0 if res > 0: return 1 return 0 def get_user_subscription_to_discussion(recID, uid): """ Returns the type of subscription for the given user to this discussion. This does not check authorizations (for eg. if user was subscribed, but is suddenly no longer authorized). @param recID: record ID @param uid: user id @return: - 0 if user is not subscribed to discussion - 1 if user is subscribed, and is allowed to unsubscribe - 2 if user is subscribed, but cannot unsubscribe """ user_email = get_email(uid) (emails1, emails2) = get_users_subscribed_to_discussion(recID, check_authorizations=False) if user_email in emails1: return 1 elif user_email in emails2: return 2 else: return 0 def get_users_subscribed_to_discussion(recID, check_authorizations=True): """ Returns the lists of users subscribed to a given discussion. Two lists are returned: the first one is the list of emails for users who can unsubscribe from the discussion, the second list contains the emails of users who cannot unsubscribe (for eg. author of the document, etc). Users appear in only one list. If a user has manually subscribed to a discussion AND is an automatic recipients for updates, it will only appear in the second list. @param recID: record ID for which we want to retrieve subscribed users @param check_authorizations: if True, check again if users are authorized to view comment @return tuple (emails1, emails2) """ subscribers_emails = {} # Get users that have subscribed to this discussion query = """SELECT id_user FROM cmtSUBSCRIPTION WHERE id_bibrec=%s""" params = (recID,) res = run_sql(query, params) for row in res: uid = row[0] if check_authorizations: user_info = collect_user_info(uid) (auth_code, auth_msg) = check_user_can_view_comments(user_info, recID) else: # Don't check and grant access auth_code = False if auth_code: # User is no longer authorized to view comments. # Delete subscription unsubscribe_user_from_discussion(recID, uid) else: email = get_email(uid) if '@' in email: subscribers_emails[email] = True # Get users automatically subscribed, based on the record metadata collections_with_auto_replies = CFG_WEBCOMMENT_EMAIL_REPLIES_TO.keys() for collection in collections_with_auto_replies: if (get_colID(collection) is not None) and \ (recID in get_collection_reclist(collection)): fields = CFG_WEBCOMMENT_EMAIL_REPLIES_TO[collection] for field in fields: emails = get_fieldvalues(recID, field) for email in emails: if not '@' in email: # Is a group: add domain name subscribers_emails[email + '@' + \ CFG_SITE_SUPPORT_EMAIL.split('@')[1]] = False else: subscribers_emails[email] = False return ([email for email, can_unsubscribe_p \ in subscribers_emails.iteritems() if can_unsubscribe_p], [email for email, can_unsubscribe_p \ in subscribers_emails.iteritems() if not can_unsubscribe_p] ) def email_subscribers_about_new_comment(recID, reviews, emails1, emails2, comID, msg="", note="", score=0, editor_type='textarea', ln=CFG_SITE_LANG, uid=-1): """ Notify subscribers that a new comment was posted. FIXME: consider recipient preference to send email in correct language. @param recID: record id @param emails1: list of emails for users who can unsubscribe from discussion @param emails2: list of emails for users who cannot unsubscribe from discussion @param comID: the comment id @param msg: comment body @param note: comment title @param score: review star score @param editor_type: the kind of editor used to submit the comment: 'textarea', 'fckeditor' @rtype: bool @return: True if email was sent okay, False if it was not. """ _ = gettext_set_language(ln) if not emails1 and not emails2: return 0 # Get title titles = get_fieldvalues(recID, "245__a") if not titles: # usual title not found, try conference title: titles = get_fieldvalues(recID, "111__a") title = '' if titles: title = titles[0] else: title = _("Record %i") % recID # Get report number report_numbers = get_fieldvalues(recID, "037__a") if not report_numbers: report_numbers = get_fieldvalues(recID, "088__a") if not report_numbers: report_numbers = get_fieldvalues(recID, "021__a") # Prepare email subject and body if reviews: email_subject = _('%(report_number)s"%(title)s" has been reviewed') % \ {'report_number': report_numbers and ('[' + report_numbers[0] + '] ') or '', 'title': title} else: email_subject = _('%(report_number)s"%(title)s" has been commented') % \ {'report_number': report_numbers and ('[' + report_numbers[0] + '] ') or '', 'title': title} washer = EmailWasher() msg = washer.wash(msg) msg = msg.replace('>>', '>') email_content = msg if note: email_content = note + email_content # Send emails to people who can unsubscribe email_header = webcomment_templates.tmpl_email_new_comment_header(recID, title, reviews, comID, report_numbers, can_unsubscribe=True, ln=ln, uid=uid) email_footer = webcomment_templates.tmpl_email_new_comment_footer(recID, title, reviews, comID, report_numbers, can_unsubscribe=True, ln=ln) res1 = True if emails1: res1 = send_email(fromaddr=CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL, toaddr=emails1, subject=email_subject, content=email_content, header=email_header, footer=email_footer, ln=ln) # Then send email to people who have been automatically # subscribed to the discussion (they cannot unsubscribe) email_header = webcomment_templates.tmpl_email_new_comment_header(recID, title, reviews, comID, report_numbers, can_unsubscribe=False, ln=ln, uid=uid) email_footer = webcomment_templates.tmpl_email_new_comment_footer(recID, title, reviews, comID, report_numbers, can_unsubscribe=False, ln=ln) res2 = True if emails2: res2 = send_email(fromaddr=CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL, toaddr=emails2, subject=email_subject, content=email_content, header=email_header, footer=email_footer, ln=ln) return res1 and res2 def get_record_status(recid): """ Returns the current status of the record, i.e. current restriction to apply for newly submitted comments, and current commenting round. The restriction to apply can be found in the record metadata, in field(s) defined by config CFG_WEBCOMMENT_RESTRICTION_DATAFIELD. The restriction is empty string "" in cases where the restriction has not explicitely been set, even if the record itself is restricted. @param recid: the record id @type recid: int @return tuple(restriction, round_name), where 'restriction' is empty string when no restriction applies @rtype (string, int) """ collections_with_rounds = CFG_WEBCOMMENT_ROUND_DATAFIELD.keys() commenting_round = "" for collection in collections_with_rounds: # Find the first collection defines rounds field for this # record if get_colID(collection) is not None and \ (recid in get_collection_reclist(collection)): commenting_rounds = get_fieldvalues(recid, CFG_WEBCOMMENT_ROUND_DATAFIELD.get(collection, "")) if commenting_rounds: commenting_round = commenting_rounds[0] break collections_with_restrictions = CFG_WEBCOMMENT_RESTRICTION_DATAFIELD.keys() restriction = "" for collection in collections_with_restrictions: # Find the first collection that defines restriction field for # this record if get_colID(collection) is not None and \ recid in get_collection_reclist(collection): restrictions = get_fieldvalues(recid, CFG_WEBCOMMENT_RESTRICTION_DATAFIELD.get(collection, "")) if restrictions: restriction = restrictions[0] break return (restriction, commenting_round) def calculate_start_date(display_since): """ Private function Returns the datetime of display_since argument in MYSQL datetime format calculated according to the local time. @param display_since: = all= no filtering nd = n days ago nw = n weeks ago nm = n months ago ny = n years ago where n is a single digit number @return: string of wanted datetime. If 'all' given as argument, will return datetext_default datetext_default is defined in miscutils/lib/dateutils and equals 0000-00-00 00:00:00 => MySQL format If bad arguement given, will return datetext_default If library 'dateutil' is not found return datetext_default and register exception. """ time_types = {'d':0, 'w':0, 'm':0, 'y':0} today = datetime.today() try: nb = int(display_since[:-1]) except: return datetext_default if display_since in [None, 'all']: return datetext_default if str(display_since[-1]) in time_types: time_type = str(display_since[-1]) else: return datetext_default # year if time_type == 'y': if (int(display_since[:-1]) > today.year - 1) or (int(display_since[:-1]) < 1): # 1 < nb years < 2008 return datetext_default else: final_nb_year = today.year - nb yesterday = today.replace(year=final_nb_year) # month elif time_type == 'm': try: from dateutil.relativedelta import relativedelta except ImportError: # The dateutil library is only recommended: if not # available, then send warning about this. register_exception(alert_admin=True) return datetext_default # obtain only the date: yyyy-mm-dd date_today = datetime.now().date() final_date = date_today - relativedelta(months=nb) yesterday = today.replace(year=final_date.year, month=final_date.month, day=final_date.day) # week elif time_type == 'w': delta = timedelta(weeks=nb) yesterday = today - delta # day elif time_type == 'd': delta = timedelta(days=nb) yesterday = today - delta return yesterday.strftime("%Y-%m-%d %H:%M:%S") def count_comments(recID): """ Returns the number of comments made on a record. """ recID = int(recID) query = """SELECT count(id) FROM cmtRECORDCOMMENT WHERE id_bibrec=%s AND star_score=0""" return run_sql(query, (recID,))[0][0] def count_reviews(recID): """ Returns the number of reviews made on a record. """ recID = int(recID) query = """SELECT count(id) FROM cmtRECORDCOMMENT WHERE id_bibrec=%s AND star_score>0""" return run_sql(query, (recID,))[0][0] def get_first_comments_or_remarks(recID=-1, ln=CFG_SITE_LANG, nb_comments='all', nb_reviews='all', voted=-1, reported=-1, user_info=None): """ Gets nb number comments/reviews or remarks. In the case of comments, will get both comments and reviews Comments and remarks sorted by most recent date, reviews sorted by highest helpful score @param recID: record id @param ln: language @param nb_comments: number of comment or remarks to get @param nb_reviews: number of reviews or remarks to get @param voted: 1 if user has voted for a remark @param reported: 1 if user has reported a comment or review @return: if comment, tuple (comments, reviews) both being html of first nb comments/reviews if remark, tuple (remakrs, None) """ _ = gettext_set_language(ln) warnings = [] voted = wash_url_argument(voted, 'int') reported = wash_url_argument(reported, 'int') ## check recID argument if type(recID) is not int: return () if recID >= 1: #comment or review. NB: suppressed reference to basket (handled in webbasket) if CFG_WEBCOMMENT_ALLOW_REVIEWS: res_reviews = query_retrieve_comments_or_remarks(recID=recID, display_order="hh", ranking=1, limit=nb_comments, user_info=user_info) nb_res_reviews = len(res_reviews) ## check nb argument if type(nb_reviews) is int and nb_reviews < len(res_reviews): first_res_reviews = res_reviews[:nb_reviews] else: first_res_reviews = res_reviews if CFG_WEBCOMMENT_ALLOW_COMMENTS: res_comments = query_retrieve_comments_or_remarks(recID=recID, display_order="od", ranking=0, limit=nb_reviews, user_info=user_info) nb_res_comments = len(res_comments) ## check nb argument if type(nb_comments) is int and nb_comments < len(res_comments): first_res_comments = res_comments[:nb_comments] else: first_res_comments = res_comments else: #error try: raise InvenioWebCommentError(_('%s is an invalid record ID') % recID) except InvenioWebCommentError, exc: register_exception() body = webcomment_templates.tmpl_error(exc.message, ln) return body #errors.append(('ERR_WEBCOMMENT_RECID_INVALID', recID)) #!FIXME dont return error anywhere since search page # comment if recID >= 1: comments = reviews = "" if reported > 0: try: raise InvenioWebCommentWarning(_('Your feedback has been recorded, many thanks.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning') warnings.append((exc.message, 'green')) #warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED_GREEN_TEXT',)) elif reported == 0: try: raise InvenioWebCommentWarning(_('Your feedback could not be recorded, please try again.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning') warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_FEEDBACK_NOT_RECORDED_RED_TEXT',)) if CFG_WEBCOMMENT_ALLOW_COMMENTS: # normal comments grouped_comments = group_comments_by_round(first_res_comments, ranking=0) comments = webcomment_templates.tmpl_get_first_comments_without_ranking(recID, ln, grouped_comments, nb_res_comments, warnings) if CFG_WEBCOMMENT_ALLOW_REVIEWS: # ranked comments #calculate average score avg_score = calculate_avg_score(res_reviews) if voted > 0: try: raise InvenioWebCommentWarning(_('Your feedback has been recorded, many thanks.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning') warnings.append((exc.message, 'green')) #warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED_GREEN_TEXT',)) elif voted == 0: try: raise InvenioWebCommentWarning(_('Your feedback could not be recorded, please try again.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning') warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_FEEDBACK_NOT_RECORDED_RED_TEXT',)) grouped_reviews = group_comments_by_round(first_res_reviews, ranking=0) reviews = webcomment_templates.tmpl_get_first_comments_with_ranking(recID, ln, grouped_reviews, nb_res_reviews, avg_score, warnings) return (comments, reviews) # remark else: return(webcomment_templates.tmpl_get_first_remarks(first_res_comments, ln, nb_res_comments), None) def group_comments_by_round(comments, ranking=0): """ Group comments by the round to which they belong """ comment_rounds = {} ordered_comment_round_names = [] for comment in comments: comment_round_name = ranking and comment[11] or comment[7] if not comment_rounds.has_key(comment_round_name): comment_rounds[comment_round_name] = [] ordered_comment_round_names.append(comment_round_name) comment_rounds[comment_round_name].append(comment) return [(comment_round_name, comment_rounds[comment_round_name]) \ for comment_round_name in ordered_comment_round_names] def calculate_avg_score(res): """ private function Calculate the avg score of reviews present in res @param res: tuple of tuple returned from query_retrieve_comments_or_remarks @return: a float of the average score rounded to the closest 0.5 """ c_star_score = 6 avg_score = 0.0 nb_reviews = 0 for comment in res: if comment[c_star_score] > 0: avg_score += comment[c_star_score] nb_reviews += 1 if nb_reviews == 0: return 0.0 avg_score = avg_score / nb_reviews avg_score_unit = avg_score - math.floor(avg_score) if avg_score_unit < 0.25: avg_score = math.floor(avg_score) elif avg_score_unit > 0.75: avg_score = math.floor(avg_score) + 1 else: avg_score = math.floor(avg_score) + 0.5 if avg_score > 5: avg_score = 5.0 return avg_score def perform_request_add_comment_or_remark(recID=0, uid=-1, action='DISPLAY', ln=CFG_SITE_LANG, msg=None, score=None, note=None, priority=None, reviews=0, comID=0, client_ip_address=None, editor_type='textarea', can_attach_files=False, subscribe=False, req=None, attached_files=None, warnings=None): """ Add a comment/review or remark @param recID: record id @param uid: user id @param action: 'DISPLAY' to display add form 'SUBMIT' to submit comment once form is filled 'REPLY' to reply to an existing comment @param ln: language @param msg: the body of the comment/review or remark @param score: star score of the review @param note: title of the review @param priority: priority of remark (int) @param reviews: boolean, if enabled will add a review, if disabled will add a comment @param comID: if replying, this is the comment id of the comment we are replying to @param editor_type: the kind of editor/input used for the comment: 'textarea', 'fckeditor' @param can_attach_files: if user can attach files to comments or not @param subscribe: if True, subscribe user to receive new comments by email @param req: request object. Used to register callback to send email notification @param attached_files: newly attached files to this comment, mapping filename to filepath @type attached_files: dict @param warnings: list of warning tuples (warning_text, warning_color) that should be considered @return: - html add form if action is display or reply - html successful added form if action is submit """ _ = gettext_set_language(ln) if warnings is None: warnings = [] actions = ['DISPLAY', 'REPLY', 'SUBMIT'] _ = gettext_set_language(ln) ## check arguments check_recID_is_in_range(recID, warnings, ln) if uid <= 0: try: raise InvenioWebCommentError(_('%s is an invalid user ID.') % uid) except InvenioWebCommentError, exc: register_exception() body = webcomment_templates.tmpl_error(exc.message, ln) return body #errors.append(('ERR_WEBCOMMENT_UID_INVALID', uid)) return '' if attached_files is None: attached_files = {} user_contact_info = query_get_user_contact_info(uid) nickname = '' if user_contact_info: if user_contact_info[0]: nickname = user_contact_info[0] # show the form if action == 'DISPLAY': if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS: return webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, nickname, ln, msg, score, note, warnings, can_attach_files=can_attach_files) elif not reviews and CFG_WEBCOMMENT_ALLOW_COMMENTS: return webcomment_templates.tmpl_add_comment_form(recID, uid, nickname, ln, msg, warnings, can_attach_files=can_attach_files) else: try: raise InvenioWebCommentError(_('Comments on records have been disallowed by the administrator.')) except InvenioWebCommentError, exc: register_exception(req=req) body = webcomment_templates.tmpl_error(exc.message, ln) return body #errors.append(('ERR_WEBCOMMENT_COMMENTS_NOT_ALLOWED',)) elif action == 'REPLY': if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS: try: raise InvenioWebCommentError(_('Cannot reply to a review.')) except InvenioWebCommentError, exc: register_exception(req=req) body = webcomment_templates.tmpl_error(exc.message, ln) return body #errors.append(('ERR_WEBCOMMENT_REPLY_REVIEW',)) return webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, nickname, ln, msg, score, note, warnings, can_attach_files=can_attach_files) elif not reviews and CFG_WEBCOMMENT_ALLOW_COMMENTS: textual_msg = msg if comID > 0: comment = query_get_comment(comID) if comment: user_info = get_user_info(comment[2]) if user_info: date_creation = convert_datetext_to_dategui(str(comment[4])) # Build two msg: one mostly textual, the other one with HTML markup, for the FCKeditor. msg = _("%(x_name)s wrote on %(x_date)s:")% {'x_name': user_info[2], 'x_date': date_creation} textual_msg = msg # 1 For FCKeditor input msg += '\n\n' msg += comment[3] msg = email_quote_txt(text=msg) # Now that we have a text-quoted version, transform into # something that FCKeditor likes, using
    that # do still enable users to insert comments inline msg = email_quoted_txt2html(text=msg, indent_html=('
    ', ' 
    '), linebreak_html=" 
    ", indent_block=False) # Add some space for users to easily add text # around the quoted message msg = '
    ' + msg + '
    ' # Due to how things are done, we need to # escape the whole msg again for the editor msg = cgi.escape(msg) # 2 For textarea input textual_msg += "\n\n" textual_msg += comment[3] textual_msg = email_quote_txt(text=textual_msg) return webcomment_templates.tmpl_add_comment_form(recID, uid, nickname, ln, msg, warnings, textual_msg, can_attach_files=can_attach_files, reply_to=comID) else: try: raise InvenioWebCommentError(_('Comments on records have been disallowed by the administrator.')) except InvenioWebCommentError, exc: register_exception(req=req) body = webcomment_templates.tmpl_error(exc.message, ln) return body #errors.append(('ERR_WEBCOMMENT_COMMENTS_NOT_ALLOWED',)) # check before submitting form elif action == 'SUBMIT': if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS: if note.strip() in ["", "None"] and not CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS: try: raise InvenioWebCommentWarning(_('You must enter a title.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_ADD_NO_TITLE',)) if score == 0 or score > 5: try: raise InvenioWebCommentWarning(_('You must choose a score.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(("WRN_WEBCOMMENT_ADD_NO_SCORE",)) if msg.strip() in ["", "None"] and not CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS: try: raise InvenioWebCommentWarning(_('You must enter a text.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_ADD_NO_BODY',)) # if no warnings, submit if len(warnings) == 0: if reviews: if check_user_can_review(recID, client_ip_address, uid): success = query_add_comment_or_remark(reviews, recID=recID, uid=uid, msg=msg, note=note, score=score, priority=0, client_ip_address=client_ip_address, editor_type=editor_type, req=req, reply_to=comID) else: try: raise InvenioWebCommentWarning(_('You already wrote a review for this record.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append('WRN_WEBCOMMENT_CANNOT_REVIEW_TWICE') success = 1 else: if check_user_can_comment(recID, client_ip_address, uid): success = query_add_comment_or_remark(reviews, recID=recID, uid=uid, msg=msg, note=note, score=score, priority=0, client_ip_address=client_ip_address, editor_type=editor_type, req=req, reply_to=comID, attached_files=attached_files) if success > 0 and subscribe: subscribe_user_to_discussion(recID, uid) else: try: raise InvenioWebCommentWarning(_('You already posted a comment short ago. Please retry later.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append('WRN_WEBCOMMENT_TIMELIMIT') success = 1 if success > 0: if CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL > 0: notify_admin_of_new_comment(comID=success) return webcomment_templates.tmpl_add_comment_successful(recID, ln, reviews, warnings, success) else: try: raise InvenioWebCommentError(_('Failed to insert your comment to the database. Please try again.')) except InvenioWebCommentError, exc: register_exception(req=req) body = webcomment_templates.tmpl_error(exc.message, ln) return body #errors.append(('ERR_WEBCOMMENT_DB_INSERT_ERROR')) # if are warnings or if inserting comment failed, show user where warnings are if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS: return webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, nickname, ln, msg, score, note, warnings, can_attach_files=can_attach_files) else: return webcomment_templates.tmpl_add_comment_form(recID, uid, nickname, ln, msg, warnings, can_attach_files=can_attach_files) # unknown action send to display else: try: raise InvenioWebCommentWarning(_('Unknown action --> showing you the default add comment form.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning', req=req) warnings.append((exc.message, '')) #warnings.append(('WRN_WEBCOMMENT_ADD_UNKNOWN_ACTION',)) if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS: return webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, ln, msg, score, note, warnings, can_attach_files=can_attach_files) else: return webcomment_templates.tmpl_add_comment_form(recID, uid, ln, msg, warnings, can_attach_files=can_attach_files) return '' def notify_admin_of_new_comment(comID): """ Sends an email to the admin with details regarding comment with ID = comID """ comment = query_get_comment(comID) if len(comment) > 0: (comID2, id_bibrec, id_user, body, date_creation, star_score, nb_votes_yes, nb_votes_total, title, nb_abuse_reports, round_name, restriction) = comment else: return user_info = query_get_user_contact_info(id_user) if len(user_info) > 0: (nickname, email, last_login) = user_info if not len(nickname) > 0: nickname = email.split('@')[0] else: nickname = email = last_login = "ERROR: Could not retrieve" review_stuff = ''' Star score = %s Title = %s''' % (star_score, title) washer = EmailWasher() try: body = washer.wash(body) except: body = cgi.escape(body) record_info = webcomment_templates.tmpl_email_new_comment_admin(id_bibrec) out = ''' The following %(comment_or_review)s has just been posted (%(date)s). AUTHOR: Nickname = %(nickname)s Email = %(email)s User ID = %(uid)s RECORD CONCERNED: Record ID = %(recID)s URL = <%(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/%(comments_or_reviews)s/> %(record_details)s %(comment_or_review_caps)s: %(comment_or_review)s ID = %(comID)s %(review_stuff)s Body = <---------------> %(body)s <---------------> ADMIN OPTIONS: To moderate the %(comment_or_review)s go to %(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/%(comments_or_reviews)s/display?%(arguments)s ''' % \ { 'comment_or_review' : star_score > 0 and 'review' or 'comment', 'comment_or_review_caps': star_score > 0 and 'REVIEW' or 'COMMENT', 'comments_or_reviews' : star_score > 0 and 'reviews' or 'comments', 'date' : date_creation, 'nickname' : nickname, 'email' : email, 'uid' : id_user, 'recID' : id_bibrec, 'record_details' : record_info, 'comID' : comID2, 'review_stuff' : star_score > 0 and review_stuff or "", 'body' : body.replace('
    ','\n'), 'siteurl' : CFG_SITE_URL, 'CFG_SITE_RECORD' : CFG_SITE_RECORD, 'arguments' : 'ln=en&do=od#%s' % comID } from_addr = '%s WebComment <%s>' % (CFG_SITE_NAME, CFG_WEBALERT_ALERT_ENGINE_EMAIL) comment_collection = get_comment_collection(comID) to_addrs = get_collection_moderators(comment_collection) rec_collection = guess_primary_collection_of_a_record(id_bibrec) report_nums = get_fieldvalues(id_bibrec, "037__a") report_nums += get_fieldvalues(id_bibrec, "088__a") report_nums = ', '.join(report_nums) subject = "A new comment/review has just been posted [%s|%s]" % (rec_collection, report_nums) send_email(from_addr, to_addrs, subject, out) def check_recID_is_in_range(recID, warnings=[], ln=CFG_SITE_LANG): """ Check that recID is >= 0 @param recID: record id @param warnings: list of warning tuples (warning_text, warning_color) @return: tuple (boolean, html) where boolean (1=true, 0=false) and html is the body of the page to display if there was a problem """ _ = gettext_set_language(ln) try: recID = int(recID) except: pass if type(recID) is int: if recID > 0: from invenio.search_engine import record_exists success = record_exists(recID) if success == 1: return (1,"") else: try: raise InvenioWebCommentWarning(_('Record ID %s does not exist in the database.') % recID) except InvenioWebCommentWarning, exc: register_exception(stream='warning') warnings.append((exc.message, '')) #warnings.append(('ERR_WEBCOMMENT_RECID_INEXISTANT', recID)) return (0, webcomment_templates.tmpl_record_not_found(status='inexistant', recID=recID, ln=ln)) elif recID == 0: try: raise InvenioWebCommentWarning(_('No record ID was given.')) except InvenioWebCommentWarning, exc: register_exception(stream='warning') warnings.append((exc.message, '')) #warnings.append(('ERR_WEBCOMMENT_RECID_MISSING',)) return (0, webcomment_templates.tmpl_record_not_found(status='missing', recID=recID, ln=ln)) else: try: raise InvenioWebCommentWarning(_('Record ID %s is an invalid ID.') % recID) except InvenioWebCommentWarning, exc: register_exception(stream='warning') warnings.append((exc.message, '')) #warnings.append(('ERR_WEBCOMMENT_RECID_INVALID', recID)) return (0, webcomment_templates.tmpl_record_not_found(status='invalid', recID=recID, ln=ln)) else: try: raise InvenioWebCommentWarning(_('Record ID %s is not a number.') % recID) except InvenioWebCommentWarning, exc: register_exception(stream='warning') warnings.append((exc.message, '')) #warnings.append(('ERR_WEBCOMMENT_RECID_NAN', recID)) return (0, webcomment_templates.tmpl_record_not_found(status='nan', recID=recID, ln=ln)) def check_int_arg_is_in_range(value, name, gte_value, lte_value=None): """ Check that variable with name 'name' >= gte_value and optionally <= lte_value @param value: variable value @param name: variable name @param errors: list of error tuples (error_id, value) @param gte_value: greater than or equal to value @param lte_value: less than or equal to value @return: boolean (1=true, 0=false) """ if type(value) is not int: try: raise InvenioWebCommentError('%s is not a number.' % value) except InvenioWebCommentError, exc: register_exception() body = webcomment_templates.tmpl_error(exc.message) return body #errors.append(('ERR_WEBCOMMENT_ARGUMENT_NAN', value)) return 0 if value < gte_value: try: raise InvenioWebCommentError('%s invalid argument.' % value) except InvenioWebCommentError, exc: register_exception() body = webcomment_templates.tmpl_error(exc.message) return body #errors.append(('ERR_WEBCOMMENT_ARGUMENT_INVALID', value)) return 0 if lte_value: if value > lte_value: try: raise InvenioWebCommentError('%s invalid argument.' % value) except InvenioWebCommentError, exc: register_exception() body = webcomment_templates.tmpl_error(exc.message) return body #errors.append(('ERR_WEBCOMMENT_ARGUMENT_INVALID', value)) return 0 return 1 def get_mini_reviews(recid, ln=CFG_SITE_LANG): """ Returns the web controls to add reviews to a record from the detailed record pages mini-panel. @param recid: the id of the displayed record @param ln: the user's language """ if CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS: action = 'SUBMIT' else: action = 'DISPLAY' reviews = query_retrieve_comments_or_remarks(recid, ranking=1) return webcomment_templates.tmpl_mini_review(recid, ln, action=action, avg_score=calculate_avg_score(reviews), nb_comments_total=len(reviews)) def check_user_can_view_comments(user_info, recid): """Check if the user is authorized to view comments for given recid. Returns the same type as acc_authorize_action """ # Check user can view the record itself first (auth_code, auth_msg) = check_user_can_view_record(user_info, recid) if auth_code: return (auth_code, auth_msg) # Check if user can view the comments ## But first can we find an authorization for this case action, ## for this collection? record_primary_collection = guess_primary_collection_of_a_record(recid) return acc_authorize_action(user_info, 'viewcomment', authorized_if_no_roles=True, collection=record_primary_collection) def check_user_can_view_comment(user_info, comid, restriction=None): """Check if the user is authorized to view a particular comment, given the comment restriction. Note that this function does not check if the record itself is restricted to the user, which would mean that the user should not see the comment. You can omit 'comid' if you already know the 'restriction' @param user_info: the user info object @param comid: the comment id of that we want to check @param restriction: the restriction applied to given comment (if known. Otherwise retrieved automatically) @return: the same type as acc_authorize_action """ if restriction is None: comment = query_get_comment(comid) if comment: restriction = comment[11] else: return (1, 'Comment %i does not exist' % comid) if restriction == "": return (0, '') return acc_authorize_action(user_info, 'viewrestrcomment', status=restriction) def check_user_can_send_comments(user_info, recid): """Check if the user is authorized to comment the given recid. This function does not check that user can view the record or view the comments Returns the same type as acc_authorize_action """ ## First can we find an authorization for this case, action + collection record_primary_collection = guess_primary_collection_of_a_record(recid) return acc_authorize_action(user_info, 'sendcomment', authorized_if_no_roles=True, collection=record_primary_collection) def check_user_can_attach_file_to_comments(user_info, recid): """Check if the user is authorized to attach a file to comments for given recid. This function does not check that user can view the comments or send comments. Returns the same type as acc_authorize_action """ ## First can we find an authorization for this case action, for ## this collection? record_primary_collection = guess_primary_collection_of_a_record(recid) return acc_authorize_action(user_info, 'attachcommentfile', authorized_if_no_roles=False, collection=record_primary_collection) diff --git a/modules/webcomment/lib/webcomment_templates.py b/modules/webcomment/lib/webcomment_templates.py index dd9070f8c..7a023b14f 100644 --- a/modules/webcomment/lib/webcomment_templates.py +++ b/modules/webcomment/lib/webcomment_templates.py @@ -1,2208 +1,2208 @@ # -*- coding: utf-8 -*- ## Comments and reviews for records. ## This file is part of Invenio. ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """HTML Templates for commenting features """ __revision__ = "$Id$" import cgi # Invenio imports from invenio.urlutils import create_html_link from invenio.webuser import get_user_info, collect_user_info, isGuestUser, get_email from invenio.dateutils import convert_datetext_to_dategui from invenio.webmessage_mailutils import email_quoted_txt2html from invenio.webcomment_config import \ CFG_WEBCOMMENT_MAX_ATTACHED_FILES, \ CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE from invenio.config import CFG_SITE_URL, \ CFG_SITE_SECURE_URL, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_SITE_NAME_INTL,\ CFG_SITE_SUPPORT_EMAIL,\ CFG_WEBCOMMENT_ALLOW_REVIEWS, \ CFG_WEBCOMMENT_ALLOW_COMMENTS, \ CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR, \ CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN, \ CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION, \ CFG_CERN_SITE, \ CFG_SITE_RECORD from invenio.htmlutils import get_html_text_editor from invenio.messages import gettext_set_language from invenio.bibformat import format_record from invenio.access_control_engine import acc_authorize_action -from invenio.websearch_templates import get_fieldvalues +from invenio.search_engine_utils import get_fieldvalues class Template: """templating class, refer to webcomment.py for examples of call""" def tmpl_get_first_comments_without_ranking(self, recID, ln, comments, nb_comments_total, warnings): """ @param recID: record id @param ln: language @param comments: tuple as returned from webcomment.py/query_retrieve_comments_or_remarks @param nb_comments_total: total number of comments for this record @param warnings: list of warning tuples (warning_text, warning_color) @return: html of comments """ # load the right message language _ = gettext_set_language(ln) # naming data fields of comments c_nickname = 0 c_user_id = 1 c_date_creation = 2 c_body = 3 c_id = 4 warnings = self.tmpl_warnings(warnings, ln) # comments comment_rows = '' last_comment_round_name = None comment_round_names = [comment[0] for comment in comments] if comment_round_names: last_comment_round_name = comment_round_names[-1] for comment_round_name, comments_list in comments: comment_rows += '
    ' % (comment_round_name) comment_rows += _('%(x_nb)i comments for round "%(x_name)s"') % {'x_nb': len(comments_list), 'x_name': comment_round_name} + "
    " for comment in comments_list: if comment[c_nickname]: nickname = comment[c_nickname] display = nickname else: (uid, nickname, display) = get_user_info(comment[c_user_id]) messaging_link = self.create_messaging_link(nickname, display, ln) comment_rows += """ """ report_link = '%s/%s/%s/comments/report?ln=%s&comid=%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recID, ln, comment[c_id]) reply_link = '%s/%s/%s/comments/add?ln=%s&comid=%s&action=REPLY' % (CFG_SITE_URL, CFG_SITE_RECORD, recID, ln, comment[c_id]) comment_rows += self.tmpl_get_comment_without_ranking(req=None, ln=ln, nickname=messaging_link, comment_uid=comment[c_user_id], date_creation=comment[c_date_creation], body=comment[c_body], status='', nb_reports=0, report_link=report_link, reply_link=reply_link, recID=recID) comment_rows += """

    """ # Close comment round comment_rows += '
    ' # write button write_button_label = _("Write a comment") write_button_link = '%s/%s/%s/comments/add' % (CFG_SITE_URL, CFG_SITE_RECORD, recID) write_button_form = '' % ln write_button_form = self.createhiddenform(action=write_button_link, method="get", text=write_button_form, button=write_button_label) # output if nb_comments_total > 0: out = warnings comments_label = len(comments) > 1 and _("Showing the latest %i comments:") % len(comments) \ or "" out += """
    %(comment_title)s
    %(comments_label)s
    %(comment_rows)s
    %(view_all_comments_link)s

    %(write_button_form)s
    """ % \ {'comment_title': _("Discuss this document"), 'comments_label': comments_label, 'nb_comments_total' : nb_comments_total, 'recID': recID, 'comment_rows': comment_rows, 'tab': ' '*4, 'siteurl': CFG_SITE_URL, 's': nb_comments_total>1 and 's' or "", 'view_all_comments_link': nb_comments_total>0 and '''View all %s comments''' \ % (CFG_SITE_URL, CFG_SITE_RECORD, recID, nb_comments_total) or "", 'write_button_form': write_button_form, 'nb_comments': len(comments) } else: out = """
    %(discuss_label)s:
    %(detailed_info)s
    %(form)s
    """ % {'form': write_button_form, 'discuss_label': _("Discuss this document"), 'detailed_info': _("Start a discussion about any aspect of this document.") } return out def tmpl_record_not_found(self, status='missing', recID="", ln=CFG_SITE_LANG): """ Displays a page when bad or missing record ID was given. @param status: 'missing' : no recID was given 'inexistant': recID doesn't have an entry in the database 'nan' : recID is not a number 'invalid' : recID is an error code, i.e. in the interval [-99,-1] @param return: body of the page """ _ = gettext_set_language(ln) if status == 'inexistant': body = _("Sorry, the record %s does not seem to exist.") % (recID,) elif status in ('nan', 'invalid'): body = _("Sorry, %s is not a valid ID value.") % (recID,) else: body = _("Sorry, no record ID was provided.") body += "

    " link = "%s." % (CFG_SITE_URL, ln, CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)) body += _("You may want to start browsing from %s") % link return body def tmpl_get_first_comments_with_ranking(self, recID, ln, comments=None, nb_comments_total=None, avg_score=None, warnings=[]): """ @param recID: record id @param ln: language @param comments: tuple as returned from webcomment.py/query_retrieve_comments_or_remarks @param nb_comments_total: total number of comments for this record @param avg_score: average score of all reviews @param warnings: list of warning tuples (warning_text, warning_color) @return: html of comments """ # load the right message language _ = gettext_set_language(ln) # naming data fields of comments c_nickname = 0 c_user_id = 1 c_date_creation = 2 c_body = 3 c_nb_votes_yes = 4 c_nb_votes_total = 5 c_star_score = 6 c_title = 7 c_id = 8 warnings = self.tmpl_warnings(warnings, ln) #stars if avg_score > 0: avg_score_img = 'stars-' + str(avg_score).split('.')[0] + '-' + str(avg_score).split('.')[1] + '.png' else: avg_score_img = "stars-0-0.png" # voting links useful_dict = { 'siteurl' : CFG_SITE_URL, 'CFG_SITE_RECORD' : CFG_SITE_RECORD, 'recID' : recID, 'ln' : ln, 'yes_img' : 'smchk_gr.gif', #'yes.gif', 'no_img' : 'iconcross.gif' #'no.gif' } link = '' + _("Yes") + '' useful_no = link + '&com_value=-1">' + _("No") + '' #comment row comment_rows = ' ' last_comment_round_name = None comment_round_names = [comment[0] for comment in comments] if comment_round_names: last_comment_round_name = comment_round_names[-1] for comment_round_name, comments_list in comments: comment_rows += '
    ' % (comment_round_name) comment_rows += _('%(x_nb)i comments for round "%(x_name)s"') % {'x_nb': len(comments_list), 'x_name': comment_round_name} + "
    " for comment in comments_list: if comment[c_nickname]: nickname = comment[c_nickname] display = nickname else: (uid, nickname, display) = get_user_info(comment[c_user_id]) messaging_link = self.create_messaging_link(nickname, display, ln) comment_rows += ''' ''' report_link = '%s/%s/%s/reviews/report?ln=%s&comid=%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recID, ln, comment[c_id]) comment_rows += self.tmpl_get_comment_with_ranking(None, ln=ln, nickname=messaging_link, comment_uid=comment[c_user_id], date_creation=comment[c_date_creation], body=comment[c_body], status='', nb_reports=0, nb_votes_total=comment[c_nb_votes_total], nb_votes_yes=comment[c_nb_votes_yes], star_score=comment[c_star_score], title=comment[c_title], report_link=report_link, recID=recID) comment_rows += ''' %s %s / %s
    ''' % (_("Was this review helpful?"), useful_yes % {'comid':comment[c_id]}, useful_no % {'comid':comment[c_id]}) comment_rows += '''
    ''' # Close comment round comment_rows += '
    ' # write button write_button_link = '''%s/%s/%s/reviews/add''' % (CFG_SITE_URL, CFG_SITE_RECORD, recID) write_button_form = ' ' % ln write_button_form = self.createhiddenform(action=write_button_link, method="get", text=write_button_form, button=_("Write a review")) if nb_comments_total > 0: avg_score_img = str(avg_score_img) avg_score = str(avg_score) nb_comments_total = str(nb_comments_total) score = '' score += _("Average review score: %(x_nb_score)s based on %(x_nb_reviews)s reviews") % \ {'x_nb_score': '' + avg_score + '', 'x_nb_reviews': nb_comments_total} useful_label = _("Readers found the following %s reviews to be most helpful.") useful_label %= len(comments) > 1 and len(comments) or "" view_all_comments_link ='' % (CFG_SITE_URL, CFG_SITE_RECORD, recID, ln) view_all_comments_link += _("View all %s reviews") % nb_comments_total view_all_comments_link += '
    ' out = warnings + """
    %(comment_title)s:
    %(score_label)s
    %(useful_label)s %(comment_rows)s
    %(view_all_comments_link)s %(write_button_form)s
    """ % \ { 'comment_title' : _("Rate this document"), 'score_label' : score, 'useful_label' : useful_label, 'recID' : recID, 'view_all_comments' : _("View all %s reviews") % (nb_comments_total,), 'write_comment' : _("Write a review"), 'comment_rows' : comment_rows, 'tab' : ' '*4, 'siteurl' : CFG_SITE_URL, 'view_all_comments_link': nb_comments_total>0 and view_all_comments_link or "", 'write_button_form' : write_button_form } else: out = '''
    %s:
    %s
    %s
    ''' % (_("Rate this document"), _("Be the first to review this document."), write_button_form) return out def tmpl_get_comment_without_ranking(self, req, ln, nickname, comment_uid, date_creation, body, status, nb_reports, reply_link=None, report_link=None, undelete_link=None, delete_links=None, unreport_link=None, recID=-1, com_id='', attached_files=None): """ private function @param req: request object to fetch user info @param ln: language @param nickname: nickname @param date_creation: date comment was written @param body: comment body @param status: status of the comment: da: deleted by author dm: deleted by moderator ok: active @param nb_reports: number of reports the comment has @param reply_link: if want reply and report, give the http links @param report_link: if want reply and report, give the http links @param undelete_link: http link to delete the message @param delete_links: http links to delete the message @param unreport_link: http link to unreport the comment @param recID: recID where the comment is posted @param com_id: ID of the comment displayed @param attached_files: list of attached files @return: html table of comment """ from invenio.search_engine import guess_primary_collection_of_a_record # load the right message language _ = gettext_set_language(ln) date_creation = convert_datetext_to_dategui(date_creation, ln=ln) if attached_files is None: attached_files = [] out = '' final_body = email_quoted_txt2html(body) title = _('%(x_name)s wrote on %(x_date)s:') % {'x_name': nickname, 'x_date': '' + date_creation + ''} title += '' % com_id links = '' moderator_links = '' if reply_link: links += '' + _("Reply") +'' if report_link and status != 'ap': links += ' | ' if report_link and status != 'ap': links += '' + _("Report abuse") + '' # Check if user is a comment moderator record_primary_collection = guess_primary_collection_of_a_record(recID) user_info = collect_user_info(req) (auth_code, auth_msg) = acc_authorize_action(user_info, 'moderatecomments', collection=record_primary_collection) if status in ['dm', 'da'] and req: if not auth_code: if status == 'dm': final_body = '
    (Comment deleted by the moderator) - not visible for users

    ' +\ final_body + '
    ' else: final_body = '
    (Comment deleted by the author) - not visible for users

    ' +\ final_body + '
    ' links = '' moderator_links += '' + _("Undelete comment") + '' else: if status == 'dm': final_body = '
    Comment deleted by the moderator
    ' else: final_body = '
    Comment deleted by the author
    ' links = '' else: if not auth_code: moderator_links += '' + _("Delete comment") + '' elif (user_info['uid'] == comment_uid) and CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION: moderator_links += '' + _("Delete comment") + '' if nb_reports >= CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN: if not auth_code: final_body = '
    (Comment reported. Pending approval) - not visible for users

    ' + final_body + '
    ' links = '' moderator_links += ' | ' moderator_links += '' + _("Unreport comment") + '' else: final_body = '
    This comment is pending approval due to user reports
    ' links = '' if links and moderator_links: links = links + ' || ' + moderator_links elif not links: links = moderator_links attached_files_html = '' if attached_files: attached_files_html = '
    %s:
    ' % (len(attached_files) == 1 and _("Attached file") or _("Attached files")) for (filename, filepath, fileurl) in attached_files: attached_files_html += create_html_link(urlbase=fileurl, urlargd={}, link_label=cgi.escape(filename)) + '
    ' attached_files_html += '
    ' out += """
    %(title)s
    %(body)s

    %(attached_files_html)s
    %(links)s
    """ % \ {'title' : '
     %s
    ' % (CFG_SITE_URL, title), 'body' : final_body, 'links' : links, 'attached_files_html': attached_files_html} return out def tmpl_get_comment_with_ranking(self, req, ln, nickname, comment_uid, date_creation, body, status, nb_reports, nb_votes_total, nb_votes_yes, star_score, title, report_link=None, delete_links=None, undelete_link=None, unreport_link=None, recID=-1): """ private function @param req: request object to fetch user info @param ln: language @param nickname: nickname @param date_creation: date comment was written @param body: comment body @param status: status of the comment @param nb_reports: number of reports the comment has @param nb_votes_total: total number of votes for this review @param nb_votes_yes: number of positive votes for this record @param star_score: star score for this record @param title: title of review @param report_link: if want reply and report, give the http links @param undelete_link: http link to delete the message @param delete_link: http link to delete the message @param unreport_link: http link to unreport the comment @param recID: recID where the comment is posted @return: html table of review """ from invenio.search_engine import guess_primary_collection_of_a_record # load the right message language _ = gettext_set_language(ln) if star_score > 0: star_score_img = 'stars-' + str(star_score) + '-0.png' else: star_score_img = 'stars-0-0.png' out = "" date_creation = convert_datetext_to_dategui(date_creation, ln=ln) reviewed_label = _("Reviewed by %(x_nickname)s on %(x_date)s") % {'x_nickname': nickname, 'x_date':date_creation} useful_label = _("%(x_nb_people)i out of %(x_nb_total)i people found this review useful") % {'x_nb_people': nb_votes_yes, 'x_nb_total': nb_votes_total} links = '' _body = '' if body != '': _body = '''
    %s
    ''' % email_quoted_txt2html(body, linebreak_html='') # Check if user is a comment moderator record_primary_collection = guess_primary_collection_of_a_record(recID) user_info = collect_user_info(req) (auth_code, auth_msg) = acc_authorize_action(user_info, 'moderatecomments', collection=record_primary_collection) if status in ['dm', 'da'] and req: if not auth_code: if status == 'dm': _body = '
    (Review deleted by moderator) - not visible for users

    ' +\ _body + '
    ' else: _body = '
    (Review deleted by author) - not visible for users

    ' +\ _body + '
    ' links = '' + _("Undelete review") + '' else: if status == 'dm': _body = '
    Review deleted by moderator
    ' else: _body = '
    Review deleted by author
    ' links = '' else: if not auth_code: links += '' + _("Delete review") + '' if nb_reports >= CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN: if not auth_code: _body = '
    (Review reported. Pending approval) - not visible for users

    ' + _body + '
    ' links += ' | ' links += '' + _("Unreport review") + '' else: _body = '
    This review is pending approval due to user reports.
    ' links = '' out += '''
    %(star_score)s%(title)s
    %(reviewed_label)s
    %(useful_label)s
    %(body)s
    %(abuse)s''' % {'siteurl' : CFG_SITE_URL, 'star_score_img': star_score_img, 'star_score' : star_score, 'title' : title, 'reviewed_label': reviewed_label, 'useful_label' : useful_label, 'body' : _body, 'abuse' : links } return out def tmpl_get_comments(self, req, recID, ln, nb_per_page, page, nb_pages, display_order, display_since, CFG_WEBCOMMENT_ALLOW_REVIEWS, comments, total_nb_comments, avg_score, warnings, border=0, reviews=0, total_nb_reviews=0, nickname='', uid=-1, note='',score=5, can_send_comments=False, can_attach_files=False, user_is_subscribed_to_discussion=False, user_can_unsubscribe_from_discussion=False, display_comment_rounds=None): """ Get table of all comments @param recID: record id @param ln: language @param nb_per_page: number of results per page @param page: page number @param display_order: hh = highest helpful score, review only lh = lowest helpful score, review only hs = highest star score, review only ls = lowest star score, review only od = oldest date nd = newest date @param display_since: all= no filtering by date nd = n days ago nw = n weeks ago nm = n months ago ny = n years ago where n is a single digit integer between 0 and 9 @param CFG_WEBCOMMENT_ALLOW_REVIEWS: is ranking enable, get from config.py/CFG_WEBCOMMENT_ALLOW_REVIEWS @param comments: tuple as returned from webcomment.py/query_retrieve_comments_or_remarks @param total_nb_comments: total number of comments for this record @param avg_score: average score of reviews for this record @param warnings: list of warning tuples (warning_text, warning_color) @param border: boolean, active if want to show border around each comment/review @param reviews: boolean, enabled for reviews, disabled for comments @param can_send_comments: boolean, if user can send comments or not @param can_attach_files: boolean, if user can attach file to comment or not @param user_is_subscribed_to_discussion: True if user already receives new comments by email @param user_can_unsubscribe_from_discussion: True is user is allowed to unsubscribe from discussion """ # load the right message language _ = gettext_set_language(ln) # CERN hack begins: display full ATLAS user name. Check further below too. current_user_fullname = "" override_nickname_p = False if CFG_CERN_SITE: from invenio.search_engine import get_all_collections_of_a_record user_info = collect_user_info(uid) if 'atlas-readaccess-active-members [CERN]' in user_info['group']: # An ATLAS member is never anonymous to its colleagues # when commenting inside ATLAS collections recid_collections = get_all_collections_of_a_record(recID) if 'ATLAS' in str(recid_collections): override_nickname_p = True current_user_fullname = user_info.get('external_fullname', '') # CERN hack ends # naming data fields of comments if reviews: c_nickname = 0 c_user_id = 1 c_date_creation = 2 c_body = 3 c_status = 4 c_nb_reports = 5 c_nb_votes_yes = 6 c_nb_votes_total = 7 c_star_score = 8 c_title = 9 c_id = 10 c_round_name = 11 c_restriction = 12 reply_to = 13 discussion = 'reviews' comments_link = '%s (%i)' % (CFG_SITE_URL, CFG_SITE_RECORD, recID, _('Comments'), total_nb_comments) reviews_link = '%s (%i)' % (_('Reviews'), total_nb_reviews) add_comment_or_review = self.tmpl_add_comment_form_with_ranking(recID, uid, current_user_fullname or nickname, ln, '', score, note, warnings, show_title_p=True, can_attach_files=can_attach_files) else: c_nickname = 0 c_user_id = 1 c_date_creation = 2 c_body = 3 c_status = 4 c_nb_reports = 5 c_id = 6 c_round_name = 7 c_restriction = 8 reply_to = 9 discussion = 'comments' comments_link = '%s (%i)' % (_('Comments'), total_nb_comments) reviews_link = '%s (%i)' % (CFG_SITE_URL, CFG_SITE_RECORD, recID, _('Reviews'), total_nb_reviews) add_comment_or_review = self.tmpl_add_comment_form(recID, uid, nickname, ln, note, warnings, can_attach_files=can_attach_files, user_is_subscribed_to_discussion=user_is_subscribed_to_discussion) # voting links useful_dict = { 'siteurl' : CFG_SITE_URL, 'CFG_SITE_RECORD' : CFG_SITE_RECORD, 'recID' : recID, 'ln' : ln, 'do' : display_order, 'ds' : display_since, 'nb' : nb_per_page, 'p' : page, 'reviews' : reviews, 'discussion' : discussion } useful_yes = '' + _("Yes") + '' useful_yes %= useful_dict useful_no = '' + _("No") + '' useful_no %= useful_dict warnings = self.tmpl_warnings(warnings, ln) link_dic = { 'siteurl' : CFG_SITE_URL, 'CFG_SITE_RECORD' : CFG_SITE_RECORD, 'module' : 'comments', 'function' : 'index', 'discussion': discussion, 'arguments' : 'do=%s&ds=%s&nb=%s' % (display_order, display_since, nb_per_page), 'arg_page' : '&p=%s' % page, 'page' : page, 'rec_id' : recID} if not req: req = None ## comments table comments_rows = '' last_comment_round_name = None comment_round_names = [comment[0] for comment in comments] if comment_round_names: last_comment_round_name = comment_round_names[-1] for comment_round_name, comments_list in comments: comment_round_style = "display:none;" comment_round_is_open = False if comment_round_name in display_comment_rounds: comment_round_is_open = True comment_round_style = "" comments_rows += '
    ' % (comment_round_name) if not comment_round_is_open and \ (comment_round_name or len(comment_round_names) > 1): new_cmtgrp = list(display_comment_rounds) new_cmtgrp.append(comment_round_name) comments_rows += '''Open group ' comments_rows += _('%(x_nb)i comments for round "%(x_name)s"') % {'x_nb': len(comments_list), 'x_name': comment_round_name} + "
    " elif comment_round_name or len(comment_round_names) > 1: new_cmtgrp = list(display_comment_rounds) new_cmtgrp.remove(comment_round_name) comments_rows += '''Close group ' comments_rows += _('%(x_nb)i comments for round "%(x_name)s"') % {'x_nb': len(comments_list), 'x_name': comment_round_name}+ "
    " comments_rows += '
    ' % (comment_round_name, comment_round_style) thread_history = [0] for comment in comments_list: if comment[reply_to] not in thread_history: # Going one level down in the thread thread_history.append(comment[reply_to]) depth = thread_history.index(comment[reply_to]) else: depth = thread_history.index(comment[reply_to]) thread_history = thread_history[:depth + 1] # CERN hack begins: display full ATLAS user name. comment_user_fullname = "" if CFG_CERN_SITE and override_nickname_p: comment_user_fullname = get_email(comment[c_user_id]) # CERN hack ends if comment[c_nickname]: _nickname = comment[c_nickname] display = _nickname else: (uid, _nickname, display) = get_user_info(comment[c_user_id]) messaging_link = self.create_messaging_link(_nickname, comment_user_fullname or display, ln) from invenio.webcomment import get_attached_files # FIXME files = get_attached_files(recID, comment[c_id]) # do NOT delete the HTML comment below. It is used for parsing... (I plead unguilty!) comments_rows += """
    """ % (depth*20) delete_links = {} if not reviews: report_link = '%(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/comments/report?ln=%(ln)s&comid=%%(comid)s&do=%(do)s&ds=%(ds)s&nb=%(nb)s&p=%(p)s&referer=%(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/comments/display' % useful_dict % {'comid':comment[c_id]} reply_link = '%(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/comments/add?ln=%(ln)s&action=REPLY&comid=%%(comid)s' % useful_dict % {'comid':comment[c_id]} delete_links['mod'] = "%s/admin/webcomment/webcommentadmin.py/del_single_com_mod?ln=%s&id=%s" % (CFG_SITE_URL, ln, comment[c_id]) delete_links['auth'] = "%s/admin/webcomment/webcommentadmin.py/del_single_com_auth?ln=%s&id=%s" % (CFG_SITE_URL, ln, comment[c_id]) undelete_link = "%s/admin/webcomment/webcommentadmin.py/undel_com?ln=%s&id=%s" % (CFG_SITE_URL, ln, comment[c_id]) unreport_link = "%s/admin/webcomment/webcommentadmin.py/unreport_com?ln=%s&id=%s" % (CFG_SITE_URL, ln, comment[c_id]) comments_rows += self.tmpl_get_comment_without_ranking(req, ln, messaging_link, comment[c_user_id], comment[c_date_creation], comment[c_body], comment[c_status], comment[c_nb_reports], reply_link, report_link, undelete_link, delete_links, unreport_link, recID, comment[c_id], files) else: report_link = '%(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/reviews/report?ln=%(ln)s&comid=%%(comid)s&do=%(do)s&ds=%(ds)s&nb=%(nb)s&p=%(p)s&referer=%(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/reviews/display' % useful_dict % {'comid': comment[c_id]} delete_links['mod'] = "%s/admin/webcomment/webcommentadmin.py/del_single_com_mod?ln=%s&id=%s" % (CFG_SITE_URL, ln, comment[c_id]) delete_links['auth'] = "%s/admin/webcomment/webcommentadmin.py/del_single_com_auth?ln=%s&id=%s" % (CFG_SITE_URL, ln, comment[c_id]) undelete_link = "%s/admin/webcomment/webcommentadmin.py/undel_com?ln=%s&id=%s" % (CFG_SITE_URL, ln, comment[c_id]) unreport_link = "%s/admin/webcomment/webcommentadmin.py/unreport_com?ln=%s&id=%s" % (CFG_SITE_URL, ln, comment[c_id]) comments_rows += self.tmpl_get_comment_with_ranking(req, ln, messaging_link, comment[c_user_id], comment[c_date_creation], comment[c_body], comment[c_status], comment[c_nb_reports], comment[c_nb_votes_total], comment[c_nb_votes_yes], comment[c_star_score], comment[c_title], report_link, delete_links, undelete_link, unreport_link, recID) helpful_label = _("Was this review helpful?") report_abuse_label = "(" + _("Report abuse") + ")" yes_no_separator = ' / ' if comment[c_nb_reports] >= CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN or comment[c_status] in ['dm', 'da']: report_abuse_label = "" helpful_label = "" useful_yes = "" useful_no = "" yes_no_separator = "" comments_rows += """ %(yes_no_separator)s
    %(helpful_label)s %(tab)s %(yes)s %(no)s %(tab)s%(tab)s%(report_abuse_label)s
    """ \ % {'helpful_label': helpful_label, 'yes' : useful_yes % {'comid':comment[c_id]}, 'yes_no_separator': yes_no_separator, 'no' : useful_no % {'comid':comment[c_id]}, 'report' : report_link % {'comid':comment[c_id]}, 'report_abuse_label': comment[c_nb_reports] >= CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN and '' or report_abuse_label, 'tab' : ' '*2} # do NOT remove HTML comment below. It is used for parsing... comments_rows += """
    """ comments_rows += '
    ' ## page links page_links = '' # Previous if page != 1: link_dic['arg_page'] = 'p=%s' % (page - 1) page_links += '<< ' % link_dic else: page_links += ' %s ' % (' '*(len(_('Previous'))+7)) # Page Numbers for i in range(1, nb_pages+1): link_dic['arg_page'] = 'p=%s' % i link_dic['page'] = '%s' % i if i != page: page_links += ''' %(page)s ''' % link_dic else: page_links += ''' %s ''' % i # Next if page != nb_pages: link_dic['arg_page'] = 'p=%s' % (page + 1) page_links += ''' >> ''' % link_dic else: page_links += '%s' % (' '*(len(_('Next'))+7)) ## stuff for ranking if enabled if reviews: if avg_score > 0: avg_score_img = 'stars-' + str(avg_score).split('.')[0] + '-' + str(avg_score).split('.')[1] + '.png' else: avg_score_img = "stars-0-0.png" ranking_average = '
    ' ranking_average += _("Average review score: %(x_nb_score)s based on %(x_nb_reviews)s reviews") % \ {'x_nb_score': '' + str(avg_score) + '', 'x_nb_reviews': str(total_nb_reviews)} ranking_average += '
    ' else: ranking_average = "" write_button_link = '''%s/%s/%s/%s/add''' % (CFG_SITE_URL, CFG_SITE_RECORD, recID, discussion) write_button_form = '' write_button_form = self.createhiddenform(action=write_button_link, method="get", text=write_button_form, button = reviews and _('Write a review') or _('Write a comment')) if reviews: total_label = _("There is a total of %s reviews") else: total_label = _("There is a total of %s comments") total_label %= total_nb_comments review_or_comment_first = '' if reviews == 0 and total_nb_comments == 0 and can_send_comments: review_or_comment_first = _("Start a discussion about any aspect of this document.") + '
    ' elif reviews == 1 and total_nb_reviews == 0 and can_send_comments: review_or_comment_first = _("Be the first to review this document.") + '
    ' # do NOT remove the HTML comments below. Used for parsing body = ''' %(comments_and_review_tabs)s
    %(comments_rows)s
    %(review_or_comment_first)s
    ''' % \ { 'record_label': _("Record"), 'back_label': _("Back to search results"), 'total_label': total_label, 'write_button_form' : write_button_form, 'write_button_form_again' : total_nb_comments>3 and write_button_form or "", 'comments_rows' : comments_rows, 'total_nb_comments' : total_nb_comments, 'comments_or_reviews' : reviews and _('review') or _('comment'), 'comments_or_reviews_title' : reviews and _('Review') or _('Comment'), 'siteurl' : CFG_SITE_URL, 'module' : "comments", 'recid' : recID, 'ln' : ln, 'border' : border, 'ranking_avg' : ranking_average, 'comments_and_review_tabs' : CFG_WEBCOMMENT_ALLOW_REVIEWS and \ CFG_WEBCOMMENT_ALLOW_COMMENTS and \ '%s | %s
    ' % \ (comments_link, reviews_link) or '', 'review_or_comment_first' : review_or_comment_first } # form is not currently used. reserved for an eventual purpose #form = """ # Display # comments per page that are # and sorted by # """ % \ # (reviews==1 and ''' # # # # # ''' or ''' # ''') # #form_link = "%(siteurl)s/%(module)s/%(function)s" % link_dic #form = self.createhiddenform(action=form_link, method="get", text=form, button='Go', recid=recID, p=1) pages = """
    %(v_label)s %(comments_or_reviews)s %(results_nb_lower)s-%(results_nb_higher)s
    %(page_links)s
    """ % \ {'v_label': _("Viewing"), 'page_links': _("Page:") + page_links , 'comments_or_reviews': reviews and _('review') or _('comment'), 'results_nb_lower': len(comments)>0 and ((page-1) * nb_per_page)+1 or 0, 'results_nb_higher': page == nb_pages and (((page-1) * nb_per_page) + len(comments)) or (page * nb_per_page)} if nb_pages > 1: #body = warnings + body + form + pages body = warnings + body + pages else: body = warnings + body if reviews == 0: if not user_is_subscribed_to_discussion: body += '' body += '
    ' + '' % CFG_SITE_URL + \ ' ' + '' + create_html_link(urlbase=CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + \ str(recID) + '/comments/subscribe', urlargd={}, link_label=_('Subscribe')) + \ '' + ' to this discussion. You will then receive all new comments by email.' + '
    ' body += '

    ' elif user_can_unsubscribe_from_discussion: body += '' body += '
    ' + '' % CFG_SITE_URL + \ ' ' + '' + create_html_link(urlbase=CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + \ str(recID) + '/comments/unsubscribe', urlargd={}, link_label=_('Unsubscribe')) + \ '' + ' from this discussion. You will no longer receive emails about new comments.' + '
    ' body += '

    ' if can_send_comments: body += add_comment_or_review else: body += '
    ' + _("You are not authorized to comment or review.") + '' return '
    ' + body + '
    ' def create_messaging_link(self, to, display_name, ln=CFG_SITE_LANG): """prints a link to the messaging system""" link = "%s/yourmessages/write?msg_to=%s&ln=%s" % (CFG_SITE_URL, to, ln) if to: return '%s' % (link, display_name) else: return display_name def createhiddenform(self, action="", method="get", text="", button="confirm", cnfrm='', **hidden): """ create select with hidden values and submit button @param action: name of the action to perform on submit @param method: 'get' or 'post' @param text: additional text, can also be used to add non hidden input @param button: value/caption on the submit button @param cnfrm: if given, must check checkbox to confirm @param **hidden: dictionary with name=value pairs for hidden input @return: html form """ output = """
    """ % (action, method.lower().strip() in ['get', 'post'] and method or 'get') output += """
    """ output += text + '\n' if cnfrm: output += """ """ for key in hidden.keys(): if type(hidden[key]) is list: for value in hidden[key]: output += """ """ % (key, value) else: output += """ """ % (key, hidden[key]) output += """
    """ output += """ """ % (button, ) output += """
    """ return output def create_write_comment_hiddenform(self, action="", method="get", text="", button="confirm", cnfrm='', enctype='', form_id=None, form_name=None, **hidden): """ create select with hidden values and submit button @param action: name of the action to perform on submit @param method: 'get' or 'post' @param text: additional text, can also be used to add non hidden input @param button: value/caption on the submit button @param cnfrm: if given, must check checkbox to confirm @param form_id: HTML 'id' attribute of the form tag @param form_name: HTML 'name' attribute of the form tag @param **hidden: dictionary with name=value pairs for hidden input @return: html form """ enctype_attr = '' if enctype: enctype_attr = 'enctype=' + enctype output = """
    """ % \ (action, method.lower().strip() in ['get', 'post'] and method or 'get', enctype_attr, form_name and ' name="%s"' % form_name or '', form_id and ' id="%s"' % form_id or '') if cnfrm: output += """ """ for key in hidden.keys(): if type(hidden[key]) is list: for value in hidden[key]: output += """ """ % (key, value) else: output += """ """ % (key, hidden[key]) output += text + '\n' output += """
    """ return output def tmpl_warnings(self, warnings=[], ln=CFG_SITE_LANG): """ Display len(warnings) warning fields @param warnings: list of warning tuples (warning_text, warning_color) @param ln=language @return: html output """ if type(warnings) is not list: warnings = [warnings] warningbox = "" if warnings: for i in range(len(warnings)): warning_text = warnings[i][0] warning_color = warnings[i][1] if warning_color == 'green': span_class = 'exampleleader' else: span_class = 'important' warningbox += ''' %(warning)s
    ''' % \ { 'span_class' : span_class, 'warning' : warning_text } return warningbox else: return "" def tmpl_error(self, error, ln=CFG_SITE_LANG): """ Display error @param error: string @param ln=language @return: html output """ _ = gettext_set_language(ln) errorbox = "" if error != "": errorbox = "
    \n Error:\n" errorbox += "

    " errorbox += error + "

    " errorbox += "

    \n" return errorbox def tmpl_add_comment_form(self, recID, uid, nickname, ln, msg, warnings, textual_msg=None, can_attach_files=False, user_is_subscribed_to_discussion=False, reply_to=None): """ Add form for comments @param recID: record id @param uid: user id @param ln: language @param msg: comment body contents for when refreshing due to warning, or when replying to a comment @param textual_msg: same as 'msg', but contains the textual version in case user cannot display FCKeditor @param warnings: list of warning tuples (warning_text, warning_color) @param can_attach_files: if user can upload attach file to record or not @param user_is_subscribed_to_discussion: True if user already receives new comments by email @param reply_to: the ID of the comment we are replying to. None if not replying @return html add comment form """ _ = gettext_set_language(ln) link_dic = { 'siteurl' : CFG_SITE_URL, 'CFG_SITE_RECORD' : CFG_SITE_RECORD, 'module' : 'comments', 'function' : 'add', 'arguments' : 'ln=%s&action=%s' % (ln, 'SUBMIT'), 'recID' : recID} if textual_msg is None: textual_msg = msg # FIXME a cleaner handling of nicknames is needed. if not nickname: (uid, nickname, display) = get_user_info(uid) if nickname: note = _("Note: Your nickname, %s, will be displayed as author of this comment.") % ('' + nickname + '') else: (uid, nickname, display) = get_user_info(uid) link = '' % CFG_SITE_SECURE_URL note = _("Note: you have not %(x_url_open)sdefined your nickname%(x_url_close)s. %(x_nickname)s will be displayed as the author of this comment.") % \ {'x_url_open': link, 'x_url_close': '', 'x_nickname': '
    ' + display + ''} if not CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR: note += '
    ' + ' '*10 + cgi.escape('You can use some HTML tags: , ,
    ,
    ,

    , ,

      ,
    • , , ') #from invenio.search_engine import print_record #record_details = print_record(recID=recID, format='hb', ln=ln) warnings = self.tmpl_warnings(warnings, ln) # Prepare file upload settings. We must enable file upload in # the fckeditor + a simple file upload interface (independant from editor) file_upload_url = None simple_attach_file_interface = '' if isGuestUser(uid): simple_attach_file_interface = "%s
      " % _("Once logged in, authorized users can also attach files.") if can_attach_files: # Note that files can be uploaded only when user is logged in #file_upload_url = '%s/%s/%i/comments/attachments/put' % \ # (CFG_SITE_URL, CFG_SITE_RECORD, recID) simple_attach_file_interface = '''
      %(attach_msg)s: (%(nb_files_limit_msg)s. %(file_size_limit_msg)s)

      ''' % \ {'CFG_WEBCOMMENT_MAX_ATTACHED_FILES': CFG_WEBCOMMENT_MAX_ATTACHED_FILES, 'attach_msg': CFG_WEBCOMMENT_MAX_ATTACHED_FILES == 1 and _("Optionally, attach a file to this comment") or \ _("Optionally, attach files to this comment"), 'nb_files_limit_msg': _("Max one file") and CFG_WEBCOMMENT_MAX_ATTACHED_FILES == 1 or \ _("Max %i files") % CFG_WEBCOMMENT_MAX_ATTACHED_FILES, 'file_size_limit_msg': CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and _("Max %(x_nb_bytes)s per file") % {'x_nb_bytes': (CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE < 1024*1024 and (str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB') or (str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/(1024*1024)) + 'MB'))} or ''} editor = get_html_text_editor(name='msg', content=msg, textual_content=textual_msg, width='100%', height='400px', enabled=CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR, file_upload_url=file_upload_url, toolbar_set = "WebComment") subscribe_to_discussion = '' if not user_is_subscribed_to_discussion: # Offer to subscribe to discussion subscribe_to_discussion = '' % _("Send me an email when a new comment is posted") form = """

      %(add_comment)s

      %(editor)s
      %(simple_attach_file_interface)s %(note)s
      %(subscribe_to_discussion)s
      %(reply_to)s
      """ % {'note': note, 'record_label': _("Article") + ":", 'comment_label': _("Comment") + ":", 'add_comment': _('Add comment'), 'editor': editor, 'subscribe_to_discussion': subscribe_to_discussion, 'reply_to': reply_to and '' % reply_to or '', 'simple_attach_file_interface': simple_attach_file_interface} form_link = "%(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/comments/%(function)s?%(arguments)s" % link_dic form = self.create_write_comment_hiddenform(action=form_link, method="post", text=form, button='Add comment', enctype='multipart/form-data', form_id='cmtForm', form_name='cmtForm') form += '
      ' return warnings + form + self.tmpl_page_do_not_leave_comment_page_js(ln=ln) def tmpl_add_comment_form_with_ranking(self, recID, uid, nickname, ln, msg, score, note, warnings, textual_msg=None, show_title_p=False, can_attach_files=False): """ Add form for reviews @param recID: record id @param uid: user id @param ln: language @param msg: comment body contents for when refreshing due to warning @param textual_msg: the textual version of 'msg' when user cannot display FCKeditor @param score: review score @param note: review title @param warnings: list of warning tuples (warning_text, warning_color) @param show_title_p: if True, prefix the form with "Add Review" as title @param can_attach_files: if user can upload attach file to record or not @return: html add review form """ _ = gettext_set_language(ln) link_dic = { 'siteurl' : CFG_SITE_URL, 'CFG_SITE_RECORD' : CFG_SITE_RECORD, 'module' : 'comments', 'function' : 'add', 'arguments' : 'ln=%s&action=%s' % (ln, 'SUBMIT'), 'recID' : recID} warnings = self.tmpl_warnings(warnings, ln) if textual_msg is None: textual_msg = msg #from search_engine import print_record #record_details = print_record(recID=recID, format='hb', ln=ln) if nickname: note_label = _("Note: Your nickname, %s, will be displayed as the author of this review.") note_label %= ('' + nickname + '') else: (uid, nickname, display) = get_user_info(uid) link = '
      ' % CFG_SITE_SECURE_URL note_label = _("Note: you have not %(x_url_open)sdefined your nickname%(x_url_close)s. %(x_nickname)s will be displayed as the author of this comment.") % \ {'x_url_open': link, 'x_url_close': '', 'x_nickname': '
      ' + display + ''} selected0 = '' selected1 = '' selected2 = '' selected3 = '' selected4 = '' selected5 = '' if score == 0: selected0 = ' selected="selected"' elif score == 1: selected1 = ' selected="selected"' elif score == 2: selected2 = ' selected="selected"' elif score == 3: selected3 = ' selected="selected"' elif score == 4: selected4 = ' selected="selected"' elif score == 5: selected5 = ' selected="selected"' ## file_upload_url = None ## if can_attach_files: ## file_upload_url = '%s/%s/%i/comments/attachments/put' % \ ## (CFG_SITE_URL, CFG_SITE_RECORD, recID) editor = get_html_text_editor(name='msg', content=msg, textual_content=msg, width='90%', height='400px', enabled=CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR, # file_upload_url=file_upload_url, toolbar_set = "WebComment") form = """%(add_review)s
      %(rate_label)s:
      %(title_label)s:
      %(write_label)s:
      %(editor)s
      %(note_label)s
      """ % {'article_label': _('Article'), 'rate_label': _("Rate this article"), 'select_label': _("Select a score"), 'title_label': _("Give a title to your review"), 'write_label': _("Write your review"), 'note_label': note_label, 'note' : note!='' and note or "", 'msg' : msg!='' and msg or "", #'record' : record_details 'add_review': show_title_p and ('

      '+_('Add review')+'

      ') or '', 'selected0': selected0, 'selected1': selected1, 'selected2': selected2, 'selected3': selected3, 'selected4': selected4, 'selected5': selected5, 'editor': editor, } form_link = "%(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/reviews/%(function)s?%(arguments)s" % link_dic form = self.createhiddenform(action=form_link, method="post", text=form, button=_('Add Review')) return warnings + form def tmpl_add_comment_successful(self, recID, ln, reviews, warnings, success): """ @param recID: record id @param ln: language @return: html page of successfully added comment/review """ _ = gettext_set_language(ln) link_dic = { 'siteurl' : CFG_SITE_URL, 'CFG_SITE_RECORD' : CFG_SITE_RECORD, 'module' : 'comments', 'function' : 'display', 'arguments' : 'ln=%s&do=od' % ln, 'recID' : recID, 'discussion': reviews == 1 and 'reviews' or 'comments'} link = "%(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/%(discussion)s/%(function)s?%(arguments)s" % link_dic if warnings: out = self.tmpl_warnings(warnings, ln) + '

      ' else: if reviews: out = _("Your review was successfully added.") + '

      ' else: out = _("Your comment was successfully added.") + '

      ' link += "#%s" % success out += '' % link out += _('Back to record') + '' return out def tmpl_create_multiple_actions_form(self, form_name="", form_action="", method="get", action_display={}, action_field_name="", button_label="", button_name="", content="", **hidden): """ Creates an HTML form with a multiple choice of actions and a button to select it. @param form_action: link to the receiver of the formular @param form_name: name of the HTML formular @param method: either 'GET' or 'POST' @param action_display: dictionary of actions. action is HTML name (name of action) display is the string provided in the popup @param action_field_name: html name of action field @param button_label: what's written on the button @param button_name: html name of the button @param content: what's inside te formular @param **hidden: dictionary of name/value pairs of hidden fields. """ output = """
      """ % (form_action, method) output += """
      """ output += content + '\n' for key in hidden.keys(): if type(hidden[key]) is list: for value in hidden[key]: output += """ """ % (key, value) else: output += """ """ % (key, hidden[key]) output += """
      """ if type(action_display) is dict and len(action_display.keys()): output += """ """ output += """ """ % (button_label, button_name) output += """
      """ return output def tmpl_admin_index(self, ln): """ Index page """ # load the right message language _ = gettext_set_language(ln) out = '
        ' if CFG_WEBCOMMENT_ALLOW_COMMENTS or CFG_WEBCOMMENT_ALLOW_REVIEWS: if CFG_WEBCOMMENT_ALLOW_COMMENTS: out += '

        Comments status

        ' out += '
      1. %(hot_cmt_label)s
      2. ' % \ {'siteurl': CFG_SITE_URL, 'ln': ln, 'hot_cmt_label': _("View most commented records")} out += '
      3. %(latest_cmt_label)s
      4. ' % \ {'siteurl': CFG_SITE_URL, 'ln': ln, 'latest_cmt_label': _("View latest commented records")} out += '
      5. %(reported_cmt_label)s
      6. ' % \ {'siteurl': CFG_SITE_URL, 'ln': ln, 'reported_cmt_label': _("View all comments reported as abuse")} if CFG_WEBCOMMENT_ALLOW_REVIEWS: out += '

        Reviews status

        ' out += '
      7. %(hot_rev_label)s
      8. ' % \ {'siteurl': CFG_SITE_URL, 'ln': ln, 'hot_rev_label': _("View most reviewed records")} out += '
      9. %(latest_rev_label)s
      10. ' % \ {'siteurl': CFG_SITE_URL, 'ln': ln, 'latest_rev_label': _("View latest reviewed records")} out += '
      11. %(reported_rev_label)s
      12. ' % \ {'siteurl': CFG_SITE_URL, 'ln': ln, 'reported_rev_label': _("View all reviews reported as abuse")} #
      13. %(delete_label)s
      14. out +="""

        General

      15. %(view_users)s
      16. %(guide)s
      17. """ % {'siteurl' : CFG_SITE_URL, #'delete_label': _("Delete/Undelete comment(s) or suppress abuse report(s)"), 'view_users': _("View all users who have been reported"), 'ln' : ln, 'guide' : _("Guide")} else: out += _("Comments and reviews are disabled") + '
        ' out += '
      ' from invenio.bibrankadminlib import addadminbox return addadminbox('%s'% _("Menu"), [out]) def tmpl_admin_delete_form(self, ln, warnings): """ Display admin interface to fetch list of records to delete @param warnings: list of warning tuples (warning_text, warning_color) see tmpl_warnings, warning_color is optional """ # load the right message language _ = gettext_set_language(ln) warnings = self.tmpl_warnings(warnings, ln) out = '''
      %s

      '''% _("Please enter the ID of the comment/review so that you can view it before deciding whether to delete it or not") form = '''
      %s


      %s

      %s


      ''' % (_("Comment ID:"), _("Or enter a record ID to list all the associated comments/reviews:"), _("Record ID:")) form_link = "%s/admin/webcomment/webcommentadmin.py/delete?ln=%s" % (CFG_SITE_URL, ln) form = self.createhiddenform(action=form_link, method="get", text=form, button=_('View Comment')) return warnings + out + form def tmpl_admin_users(self, ln, users_data): """ @param users_data: tuple of ct, i.e. (ct, ct, ...) where ct is a tuple (total_number_reported, total_comments_reported, total_reviews_reported, total_nb_votes_yes_of_reported, total_nb_votes_total_of_reported, user_id, user_email, user_nickname) sorted by order of ct having highest total_number_reported """ _ = gettext_set_language(ln) u_reports = 0 u_comment_reports = 1 u_reviews_reports = 2 u_nb_votes_yes = 3 u_nb_votes_total = 4 u_uid = 5 u_email = 6 u_nickname = 7 if not users_data: return self.tmpl_warnings([(_("There have been no reports so far."), 'green')]) user_rows = "" for utuple in users_data: com_label = _("View all %s reported comments") % utuple[u_comment_reports] com_link = '''%s
      ''' % \ (CFG_SITE_URL, ln, utuple[u_uid], com_label) rev_label = _("View all %s reported reviews") % utuple[u_reviews_reports] rev_link = '''%s''' % \ (CFG_SITE_URL, ln, utuple[u_uid], rev_label) if not utuple[u_nickname]: user_info = get_user_info(utuple[u_uid]) nickname = user_info[2] else: nickname = utuple[u_nickname] if CFG_WEBCOMMENT_ALLOW_REVIEWS: review_row = """ %s %s %s""" review_row %= (utuple[u_nb_votes_yes], utuple[u_nb_votes_total] - utuple[u_nb_votes_yes], utuple[u_nb_votes_total]) else: review_row = '' user_rows += """ %(nickname)s %(email)s %(uid)s%(review_row)s %(reports)s %(com_link)s%(rev_link)s """ % { 'nickname' : nickname, 'email' : utuple[u_email], 'uid' : utuple[u_uid], 'reports' : utuple[u_reports], 'review_row': review_row, 'siteurl' : CFG_SITE_URL, 'ln' : ln, 'com_link' : CFG_WEBCOMMENT_ALLOW_COMMENTS and com_link or "", 'rev_link' : CFG_WEBCOMMENT_ALLOW_REVIEWS and rev_link or "" } out = "
      " out += _("Here is a list, sorted by total number of reports, of all users who have had a comment reported at least once.") out += """

      \n' out += '\n' out += '\n' if CFG_WEBCOMMENT_ALLOW_REVIEWS > 0: out += '\n' out += '\n' out += '\n' out += '\n' out += '\n' out += """ %s
      """ out += _("Nickname") + '' + _("Email") + '' + _("User ID") + '' + _("Number positive votes") + '' + _("Number negative votes") + '' + _("Total number votes") + '' + _("Total number of reports") + '' + _("View all user's reported comments/reviews") + '
      """ % user_rows return out def tmpl_admin_select_comment_checkbox(self, cmt_id): """ outputs a checkbox named "comidXX" where XX is cmt_id """ return '' % int(cmt_id) def tmpl_admin_user_info(self, ln, nickname, uid, email): """ prepares informations about a user""" _ = gettext_set_language(ln) out = """ %(nickname_label)s: %(messaging)s
      %(uid_label)s: %(uid)i
      %(email_label)s: %(email)s""" out %= {'nickname_label': _("Nickname"), 'messaging': self.create_messaging_link(uid, nickname, ln), 'uid_label': _("User ID"), 'uid': int(uid), 'email_label': _("Email"), 'email': email} return out def tmpl_admin_review_info(self, ln, reviews, nb_reports, cmt_id, rec_id, status): """ outputs information about a review """ _ = gettext_set_language(ln) if reviews: reported_label = _("This review has been reported %i times") else: reported_label = _("This comment has been reported %i times") reported_label %= int(nb_reports) out = """ %(reported_label)s
      %(rec_id_label)s
      %(cmt_id_label)s""" out %= {'reported_label': reported_label, 'rec_id_label': _("Record") + ' #' + str(rec_id), 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD' : CFG_SITE_RECORD, 'rec_id': int(rec_id), 'cmt_id_label': _("Comment") + ' #' + str(cmt_id), 'ln': ln} if status in ['dm', 'da']: out += '
      Marked as deleted
      ' return out def tmpl_admin_latest(self, ln, comment_data, comments, error, user_collections, collection): """ @param comment_data: same type of tuple as that which is return by webcommentadminlib.py/query_get_latest i.e. tuple (nickname, uid, date_creation, body, id) if latest comments or tuple (nickname, uid, date_creation, body, star_score, id) if latest reviews """ _ = gettext_set_language(ln) out = """ """ out += '
      ' % (CFG_SITE_URL, ln, comments) out += '' % ln out += '' % comments out += '
      Filter by collection:

      ' if error == 1: out += "User is not authorized to view such collection.
      " return out elif error == 2: out += "There are no %s for this collection.
      " % (comments and 'comments' or 'reviews') return out out += """
        """ for (cmt_tuple, meta_data) in comment_data: bibrec_id = meta_data[3] content = format_record(bibrec_id, "hs") if not comments: out += """
      1. %(content)s
        reviewed by %(user)s (%(stars)s) \"%(body)s\" on %(date)s

      2. """ % {'content': content, 'comment_url': CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + str(bibrec_id) + '/reviews', 'user':cmt_tuple[0] , 'stars': '*' * int(cmt_tuple[4]) , 'body': cmt_tuple[3][:20] + '...', 'date': cmt_tuple[2]} else: out += """
      3. %(content)s
        commented by %(user)s, \"%(body)s\" on %(date)s

      4. """ % {'content': content, 'comment_url': CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + str(bibrec_id) + '/comments', 'user':cmt_tuple[0] , 'body': cmt_tuple[3][:20] + '...', 'date': cmt_tuple[2]} out += """
      """ return out def tmpl_admin_hot(self, ln, comment_data, comments, error, user_collections, collection): """ @param comment_data: same type of tuple as that which is return by webcommentadminlib.py/query_get_hot i.e. tuple (id_bibrec, date_last_comment, users, count) """ _ = gettext_set_language(ln) out = """ """ out += '
      ' % (CFG_SITE_URL, ln, comments) out += '' % ln out += '' % comments out += '
      Filter by collection:

      ' if error == 1: out += "User is not authorized to view such collection.
      " return out elif error == 2: out += "There are no %s for this collection.
      " % (comments and 'comments' or 'reviews') return out for cmt_tuple in comment_data: bibrec_id = cmt_tuple[0] content = format_record(bibrec_id, "hs") last_comment_date = cmt_tuple[1] total_users = cmt_tuple[2] total_comments = cmt_tuple[3] if comments: comment_url = CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + str(bibrec_id) + '/comments' str_comment = int(total_comments) > 1 and 'comments' or 'comment' else: comment_url = CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + str(bibrec_id) + '/reviews' str_comment = int(total_comments) > 1 and 'reviews' or 'review' out += """
    • %(content)s
      %(total_comments)s %(str_comment)s (%(total_users)s %(user)s), latest on %(last_comment_date)s

    • """ % {'content': content, 'comment_url': comment_url , 'total_comments': total_comments, 'str_comment': str_comment, 'total_users': total_users, 'user': int(total_users) > 1 and 'users' or 'user', 'last_comment_date': last_comment_date} out += """""" return out def tmpl_admin_comments(self, ln, uid, comID, recID, comment_data, reviews, error, user_collections, collection): """ @param comment_data: same type of tuple as that which is returned by webcomment.py/query_retrieve_comments_or_remarks i.e. tuple of comment where comment is tuple (nickname, date_creation, body, id) if ranking disabled or tuple (nickname, date_creation, body, nb_votes_yes, nb_votes_total, star_score, title, id) """ _ = gettext_set_language(ln) coll_form = """ """ coll_form += '
      ' % (CFG_SITE_URL, ln, reviews) coll_form += '' % ln coll_form += '' % reviews coll_form += '
      Filter by collection:

      ' if error == 1: coll_form += "User is not authorized to view such collection.
      " return coll_form elif error == 2: coll_form += "There are no %s for this collection.
      " % (reviews and 'reviews' or 'comments') return coll_form comments = [] comments_info = [] checkboxes = [] users = [] for (cmt_tuple, meta_data) in comment_data: if reviews: comments.append(self.tmpl_get_comment_with_ranking(None,#request object ln, cmt_tuple[0],#nickname cmt_tuple[1],#userid cmt_tuple[2],#date_creation cmt_tuple[3],#body cmt_tuple[9],#status 0, cmt_tuple[5],#nb_votes_total cmt_tuple[4],#nb_votes_yes cmt_tuple[6],#star_score cmt_tuple[7]))#title else: comments.append(self.tmpl_get_comment_without_ranking(None,#request object ln, cmt_tuple[0],#nickname cmt_tuple[1],#userid cmt_tuple[2],#date_creation cmt_tuple[3],#body cmt_tuple[5],#status 0, None, #reply_link None, #report_link None, #undelete_link None)) #delete_links users.append(self.tmpl_admin_user_info(ln, meta_data[0], #nickname meta_data[1], #uid meta_data[2]))#email if reviews: status = cmt_tuple[9] else: status = cmt_tuple[5] comments_info.append(self.tmpl_admin_review_info(ln, reviews, meta_data[5], # nb abuse reports meta_data[3], # cmt_id meta_data[4], # rec_id status)) # status checkboxes.append(self.tmpl_admin_select_comment_checkbox(meta_data[3])) form_link = "%s/admin/webcomment/webcommentadmin.py/del_com?ln=%s" % (CFG_SITE_URL, ln) out = """ """ % {'review_label': reviews and _("Review") or _("Comment"), 'written_by_label': _("Written by"), 'review_info_label': _("General informations"), 'select_label': _("Select")} for i in range (0, len(comments)): out += """ """ % (comments[i], users[i], comments_info[i], checkboxes[i]) out += """
      %(review_label)s %(written_by_label)s %(review_info_label)s %(select_label)s
      %s %s %s %s
      """ if reviews: action_display = { 'delete': _('Delete selected reviews'), 'unreport': _('Suppress selected abuse report'), 'undelete': _('Undelete selected reviews') } else: action_display = { 'undelete': _('Undelete selected comments'), 'delete': _('Delete selected comments'), 'unreport': _('Suppress selected abuse report') } form = self.tmpl_create_multiple_actions_form(form_name="admin_comment", form_action=form_link, method="post", action_display=action_display, action_field_name='action', button_label=_("OK"), button_name="okbutton", content=out) if uid > 0: header = '
      ' if reviews: header += _("Here are the reported reviews of user %s") % uid else: header += _("Here are the reported comments of user %s") % uid header += '

      ' if comID > 0 and recID <= 0 and uid <= 0: if reviews: header = '
      ' +_("Here is review %s")% comID + '

      ' else: header = '
      ' +_("Here is comment %s")% comID + '

      ' if uid > 0 and comID > 0 and recID <= 0: if reviews: header = '
      ' + _("Here is review %(x_cmtID)s written by user %(x_user)s") % {'x_cmtID': comID, 'x_user': uid} else: header = '
      ' + _("Here is comment %(x_cmtID)s written by user %(x_user)s") % {'x_cmtID': comID, 'x_user': uid} header += '

      ' if comID <= 0 and recID <= 0 and uid <= 0: header = '
      ' if reviews: header += _("Here are all reported reviews sorted by the most reported") else: header += _("Here are all reported comments sorted by the most reported") header += "

      " elif recID > 0: header = '
      ' if reviews: header += _("Here are all reviews for record %i, sorted by the most reported" % recID) header += '
      %s' % (CFG_SITE_URL, recID, _("Show comments")) else: header += _("Here are all comments for record %i, sorted by the most reported" % recID) header += '
      %s' % (CFG_SITE_URL, recID, _("Show reviews")) header += "

      " return coll_form + header + form def tmpl_admin_del_com(self, del_res, ln=CFG_SITE_LANG): """ @param del_res: list of the following tuple (comment_id, was_successfully_deleted), was_successfully_deleted is boolean (0=false, >0=true """ _ = gettext_set_language(ln) table_rows = '' for deltuple in del_res: table_rows += """ %s %s """ % (deltuple[0], deltuple[1]>0 and _("Yes") or "" +_("No") + "") out = """ %s
      %s %s
      """ % (_("comment ID"), _("successfully deleted"), table_rows) return out def tmpl_admin_undel_com(self, del_res, ln=CFG_SITE_LANG): """ @param del_res: list of the following tuple (comment_id, was_successfully_undeleted), was_successfully_undeleted is boolean (0=false, >0=true """ _ = gettext_set_language(ln) table_rows = '' for deltuple in del_res: table_rows += """ """ % (deltuple[0], deltuple[1]>0 and _("Yes") or "" +_("No") + "") out = """
      %s %s
      %s
      %s %s
      """ % (_("comment ID"), _("successfully undeleted"), table_rows) return out def tmpl_admin_suppress_abuse_report(self, del_res, ln=CFG_SITE_LANG): """ @param del_res: list of the following tuple (comment_id, was_successfully_deleted), was_successfully_deleted is boolean (0=false, >0=true """ _ = gettext_set_language(ln) table_rows = '' for deltuple in del_res: table_rows += """ """ % (deltuple[0], deltuple[1]>0 and _("Yes") or "" +_("No") + "") out = """
      %s %s
      %s
      %s %s
      """ % (_("comment ID"), _("successfully suppressed abuse report"), table_rows) return out def tmpl_mini_review(self, recID, ln=CFG_SITE_LANG, action='SUBMIT', avg_score=0, nb_comments_total=0): """Display the mini version of reviews (only the grading part)""" _ = gettext_set_language(ln) url = '%s/%s/%s/reviews/add?ln=%s&action=%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recID, ln, action) if avg_score > 0: score = _("Average review score: %(x_nb_score)s based on %(x_nb_reviews)s reviews") % \ {'x_nb_score': '%.1f' % avg_score, 'x_nb_reviews': nb_comments_total} else: score = '(' +_("Not yet reviewed") + ')' if avg_score == 5: s1, s2, s3, s4, s5 = 'full', 'full', 'full', 'full', 'full' elif avg_score >= 4.5: s1, s2, s3, s4, s5 = 'full', 'full', 'full', 'full', 'half' elif avg_score >= 4: s1, s2, s3, s4, s5 = 'full', 'full', 'full', 'full', '' elif avg_score >= 3.5: s1, s2, s3, s4, s5 = 'full', 'full', 'full', 'half', '' elif avg_score >= 3: s1, s2, s3, s4, s5 = 'full', 'full', 'full', '', '' elif avg_score >= 2.5: s1, s2, s3, s4, s5 = 'full', 'full', 'half', '', '' elif avg_score >= 2: s1, s2, s3, s4, s5 = 'full', 'full', '', '', '' elif avg_score >= 1.5: s1, s2, s3, s4, s5 = 'full', 'half', '', '', '' elif avg_score == 1: s1, s2, s3, s4, s5 = 'full', '', '', '', '' else: s1, s2, s3, s4, s5 = '', '', '', '', '' out = ''' %(rate)s:

      Rate this document:
      1
      2
      3
       
      %(score)s ''' % {'url': url, 'score': score, 'rate': _("Rate this document"), 's1': s1, 's2': s2, 's3': s3, 's4': s4, 's5': s5 } return out def tmpl_email_new_comment_header(self, recID, title, reviews, comID, report_numbers, can_unsubscribe=True, ln=CFG_SITE_LANG, uid=-1): """ Prints the email header used to notify subscribers that a new comment/review was added. @param recid: the ID of the commented/reviewed record @param title: the title of the commented/reviewed record @param reviews: True if it is a review, else if a comment @param comID: the comment ID @param report_numbers: the report number(s) of the record @param can_unsubscribe: True if user can unsubscribe from alert @param ln: language """ # load the right message language _ = gettext_set_language(ln) user_info = collect_user_info(uid) out = _("Hello:") + '\n\n' + \ (reviews and _("The following review was sent to %(CFG_SITE_NAME)s by %(user_nickname)s:") or \ _("The following comment was sent to %(CFG_SITE_NAME)s by %(user_nickname)s:")) % \ {'CFG_SITE_NAME': CFG_SITE_NAME, 'user_nickname': user_info['nickname']} out += '\n(<%s>)' % (CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + str(recID)) out += '\n\n\n' return out def tmpl_email_new_comment_footer(self, recID, title, reviews, comID, report_numbers, can_unsubscribe=True, ln=CFG_SITE_LANG): """ Prints the email footer used to notify subscribers that a new comment/review was added. @param recid: the ID of the commented/reviewed record @param title: the title of the commented/reviewed record @param reviews: True if it is a review, else if a comment @param comID: the comment ID @param report_numbers: the report number(s) of the record @param can_unsubscribe: True if user can unsubscribe from alert @param ln: language """ # load the right message language _ = gettext_set_language(ln) out = '\n\n-- \n' out += _("This is an automatic message, please don't reply to it.") out += '\n' out += _("To post another comment, go to <%(x_url)s> instead.") % \ {'x_url': CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + str(recID) + \ (reviews and '/reviews' or '/comments') + '/add'} out += '\n' if not reviews: out += _("To specifically reply to this comment, go to <%(x_url)s>") % \ {'x_url': CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + str(recID) + \ '/comments/add?action=REPLY&comid=' + str(comID)} out += '\n' if can_unsubscribe: out += _("To unsubscribe from this discussion, go to <%(x_url)s>") % \ {'x_url': CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + str(recID) + \ '/comments/unsubscribe'} out += '\n' out += _("For any question, please use <%(CFG_SITE_SUPPORT_EMAIL)s>") % \ {'CFG_SITE_SUPPORT_EMAIL': CFG_SITE_SUPPORT_EMAIL} return out def tmpl_email_new_comment_admin(self, recID): """ Prints the record information used in the email to notify the system administrator that a new comment has been posted. @param recID: the ID of the commented/reviewed record """ out = "" title = get_fieldvalues(recID, "245__a") authors = ', '.join(get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")) #res_author = "" #res_rep_num = "" #for author in authors: # res_author = res_author + ' ' + author dates = get_fieldvalues(recID, "260__c") report_nums = get_fieldvalues(recID, "037__a") report_nums += get_fieldvalues(recID, "088__a") report_nums = ', '.join(report_nums) #for rep_num in report_nums: # res_rep_num = res_rep_num + ', ' + rep_num out += " Title = %s \n" % (title and title[0] or "No Title") out += " Authors = %s \n" % authors if dates: out += " Date = %s \n" % dates[0] out += " Report number = %s" % report_nums return out def tmpl_page_do_not_leave_comment_page_js(self, ln): """ Code to ask user confirmation when leaving the page, so that the comment is not lost if clicking by mistake on links. @param ln: the user language """ # load the right message language _ = gettext_set_language(ln) out = ''' ''' % {'message': _('Your comment will be lost.').replace('"', '\\"'), 'name': 'msg'} return out diff --git a/modules/websearch/lib/Makefile.am b/modules/websearch/lib/Makefile.am index f74181bfa..78b346812 100644 --- a/modules/websearch/lib/Makefile.am +++ b/modules/websearch/lib/Makefile.am @@ -1,45 +1,46 @@ ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. +## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. pylibdir = $(libdir)/python/invenio pylib_DATA = \ websearchadminlib.py \ websearch_templates.py \ websearch_webinterface.py \ websearch_regression_tests.py \ search_engine.py \ search_engine_config.py \ search_engine_tests.py \ + search_engine_utils.py \ search_engine_query_parser.py \ search_engine_query_parser_tests.py \ websearch_webcoll.py \ websearchadmin_regression_tests.py \ websearch_external_collections.py \ search_engine_summarizer.py \ websearch_external_collections_config.py \ websearch_external_collections_getter.py \ websearch_external_collections_getter_tests.py \ websearch_external_collections_parser.py \ websearch_external_collections_searcher.py \ websearch_external_collections_templates.py \ websearch_external_collections_tests.py \ - websearch_external_collections_utils.py + websearch_external_collections_utils.py EXTRA_DIST = $(pylib_DATA) CLEANFILES = *~ *.tmp *.pyc diff --git a/modules/websearch/lib/search_engine.py b/modules/websearch/lib/search_engine.py index ca071ce07..5d45ee482 100644 --- a/modules/websearch/lib/search_engine.py +++ b/modules/websearch/lib/search_engine.py @@ -1,5488 +1,5440 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0301 """Invenio Search Engine in mod_python.""" __lastupdated__ = """$Date$""" __revision__ = "$Id$" ## import general modules: import cgi import cStringIO import copy import string import os import re import time import urllib import urlparse import zlib import sys if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 ## import Invenio stuff: from invenio.config import \ CFG_CERN_SITE, \ CFG_INSPIRE_SITE, \ CFG_OAI_ID_FIELD, \ CFG_WEBCOMMENT_ALLOW_REVIEWS, \ CFG_WEBSEARCH_CALL_BIBFORMAT, \ CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX, \ CFG_WEBSEARCH_FIELDS_CONVERT, \ CFG_WEBSEARCH_NB_RECORDS_TO_SORT, \ CFG_WEBSEARCH_SEARCH_CACHE_SIZE, \ CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS, \ CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \ CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, \ CFG_WEBSEARCH_FULLTEXT_SNIPPETS, \ CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \ CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \ CFG_WEBSEARCH_WILDCARD_LIMIT, \ CFG_WEBSEARCH_SYNONYM_KBRS, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_LOGDIR, \ CFG_BIBFORMAT_HIDDEN_TAGS, \ CFG_SITE_URL, \ CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \ CFG_BIBRANK_SHOW_CITATION_LINKS, \ CFG_SOLR_URL, \ CFG_SITE_RECORD from invenio.search_engine_config import InvenioWebSearchUnknownCollectionError, InvenioWebSearchWildcardLimitError +from invenio.search_engine_utils import get_fieldvalues from invenio.bibrecord import create_record, record_get_field_instances from invenio.bibrank_record_sorter import get_bibrank_methods, rank_records, is_method_valid from invenio.bibrank_downloads_similarity import register_page_view_event, calculate_reading_similarity_list from invenio.bibindex_engine_stemmer import stem from invenio.bibindex_engine_tokenizer import wash_author_name, author_name_requires_phrase_search from invenio.bibformat import format_record, format_records, get_output_format_content_type, create_excel from invenio.bibformat_config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT from invenio.bibrank_downloads_grapher import create_download_history_graph_and_box from invenio.bibknowledge import get_kbr_values from invenio.data_cacher import DataCacher from invenio.websearch_external_collections import print_external_results_overview, perform_external_collection_search from invenio.access_control_admin import acc_get_action_id from invenio.access_control_config import VIEWRESTRCOLL, \ CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS from invenio.websearchadminlib import get_detailed_page_tabs from invenio.intbitset import intbitset as HitSet from invenio.dbquery import DatabaseError, deserialize_via_marshal, InvenioDbQueryWildcardLimitError from invenio.access_control_engine import acc_authorize_action from invenio.errorlib import register_exception from invenio.textutils import encode_for_xml, wash_for_utf8 from invenio.htmlutils import get_mathjax_header from invenio.htmlutils import nmtoken_from_string import invenio.template webstyle_templates = invenio.template.load('webstyle') webcomment_templates = invenio.template.load('webcomment') from invenio.bibrank_citation_searcher import get_cited_by_count, calculate_cited_by_list, \ calculate_co_cited_with_list, get_records_with_num_cites, get_self_cited_by, \ get_refersto_hitset, get_citedby_hitset from invenio.bibrank_citation_grapher import create_citation_history_graph_and_box from invenio.dbquery import run_sql, run_sql_with_limit, \ get_table_update_time, Error from invenio.webuser import getUid, collect_user_info from invenio.webpage import pageheaderonly, pagefooteronly, create_error_box from invenio.messages import gettext_set_language from invenio.search_engine_query_parser import SearchQueryParenthesisedParser, \ SpiresToInvenioSyntaxConverter from invenio import webinterface_handler_config as apache from invenio.solrutils import solr_get_bitset try: import invenio.template websearch_templates = invenio.template.load('websearch') except: pass from invenio.websearch_external_collections import calculate_hosted_collections_results, do_calculate_hosted_collections_results from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS VIEWRESTRCOLL_ID = acc_get_action_id(VIEWRESTRCOLL) ## global vars: cfg_nb_browse_seen_records = 100 # limit of the number of records to check when browsing certain collection cfg_nicely_ordered_collection_list = 0 # do we propose collection list nicely ordered or alphabetical? ## precompile some often-used regexp for speed reasons: re_word = re.compile('[\s]') re_quotes = re.compile('[\'\"]') re_doublequote = re.compile('\"') re_equal = re.compile('\=') re_logical_and = re.compile('\sand\s', re.I) re_logical_or = re.compile('\sor\s', re.I) re_logical_not = re.compile('\snot\s', re.I) re_operators = re.compile(r'\s([\+\-\|])\s') re_pattern_wildcards_after_spaces = re.compile(r'(\s)[\*\%]+') re_pattern_single_quotes = re.compile("'(.*?)'") re_pattern_double_quotes = re.compile("\"(.*?)\"") re_pattern_regexp_quotes = re.compile("\/(.*?)\/") re_pattern_spaces_after_colon = re.compile(r'(:\s+)') re_pattern_short_words = re.compile(r'([\s\"]\w{1,3})[\*\%]+') re_pattern_space = re.compile("__SPACE__") re_pattern_today = re.compile("\$TODAY\$") re_pattern_parens = re.compile(r'\([^\)]+\s+[^\)]+\)') re_unicode_lowercase_a = re.compile(unicode(r"(?u)[áàäâãå]", "utf-8")) re_unicode_lowercase_ae = re.compile(unicode(r"(?u)[æ]", "utf-8")) re_unicode_lowercase_e = re.compile(unicode(r"(?u)[éèëê]", "utf-8")) re_unicode_lowercase_i = re.compile(unicode(r"(?u)[íìïî]", "utf-8")) re_unicode_lowercase_o = re.compile(unicode(r"(?u)[óòöôõø]", "utf-8")) re_unicode_lowercase_u = re.compile(unicode(r"(?u)[úùüû]", "utf-8")) re_unicode_lowercase_y = re.compile(unicode(r"(?u)[ýÿ]", "utf-8")) re_unicode_lowercase_c = re.compile(unicode(r"(?u)[çć]", "utf-8")) re_unicode_lowercase_n = re.compile(unicode(r"(?u)[ñ]", "utf-8")) re_unicode_uppercase_a = re.compile(unicode(r"(?u)[ÁÀÄÂÃÅ]", "utf-8")) re_unicode_uppercase_ae = re.compile(unicode(r"(?u)[Æ]", "utf-8")) re_unicode_uppercase_e = re.compile(unicode(r"(?u)[ÉÈËÊ]", "utf-8")) re_unicode_uppercase_i = re.compile(unicode(r"(?u)[ÍÌÏÎ]", "utf-8")) re_unicode_uppercase_o = re.compile(unicode(r"(?u)[ÓÒÖÔÕØ]", "utf-8")) re_unicode_uppercase_u = re.compile(unicode(r"(?u)[ÚÙÜÛ]", "utf-8")) re_unicode_uppercase_y = re.compile(unicode(r"(?u)[Ý]", "utf-8")) re_unicode_uppercase_c = re.compile(unicode(r"(?u)[ÇĆ]", "utf-8")) re_unicode_uppercase_n = re.compile(unicode(r"(?u)[Ñ]", "utf-8")) re_latex_lowercase_a = re.compile("\\\\[\"H'`~^vu=k]\{?a\}?") re_latex_lowercase_ae = re.compile("\\\\ae\\{\\}?") re_latex_lowercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?e\\}?") re_latex_lowercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?i\\}?") re_latex_lowercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?o\\}?") re_latex_lowercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?u\\}?") re_latex_lowercase_y = re.compile("\\\\[\"']\\{?y\\}?") re_latex_lowercase_c = re.compile("\\\\['uc]\\{?c\\}?") re_latex_lowercase_n = re.compile("\\\\[c'~^vu]\\{?n\\}?") re_latex_uppercase_a = re.compile("\\\\[\"H'`~^vu=k]\\{?A\\}?") re_latex_uppercase_ae = re.compile("\\\\AE\\{?\\}?") re_latex_uppercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?E\\}?") re_latex_uppercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?I\\}?") re_latex_uppercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?O\\}?") re_latex_uppercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?U\\}?") re_latex_uppercase_y = re.compile("\\\\[\"']\\{?Y\\}?") re_latex_uppercase_c = re.compile("\\\\['uc]\\{?C\\}?") re_latex_uppercase_n = re.compile("\\\\[c'~^vu]\\{?N\\}?") class RestrictedCollectionDataCacher(DataCacher): def __init__(self): def cache_filler(): ret = [] try: res = run_sql("""SELECT DISTINCT ar.value FROM accROLE_accACTION_accARGUMENT raa JOIN accARGUMENT ar ON raa.id_accARGUMENT = ar.id WHERE ar.keyword = 'collection' AND raa.id_accACTION = %s""", (VIEWRESTRCOLL_ID,)) except Exception: # database problems, return empty cache return [] for coll in res: ret.append(coll[0]) return ret def timestamp_verifier(): return max(get_table_update_time('accROLE_accACTION_accARGUMENT'), get_table_update_time('accARGUMENT')) DataCacher.__init__(self, cache_filler, timestamp_verifier) def collection_restricted_p(collection, recreate_cache_if_needed=True): if recreate_cache_if_needed: restricted_collection_cache.recreate_cache_if_needed() return collection in restricted_collection_cache.cache try: restricted_collection_cache.is_ok_p except Exception: restricted_collection_cache = RestrictedCollectionDataCacher() def ziplist(*lists): """Just like zip(), but returns lists of lists instead of lists of tuples Example: zip([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) => [(f1, p1, op1), (f2, p2, op2), (f3, p3, '')] ziplist([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) => [[f1, p1, op1], [f2, p2, op2], [f3, p3, '']] FIXME: This is handy to have, and should live somewhere else, like miscutil.really_useful_functions or something. XXX: Starting in python 2.6, the same can be achieved (faster) by using itertools.izip_longest(); when the minimum recommended Python is bumped, we should use that instead. """ def l(*items): return list(items) return map(l, *lists) def get_permitted_restricted_collections(user_info, recreate_cache_if_needed=True): """Return a list of collection that are restricted but for which the user is authorized.""" if recreate_cache_if_needed: restricted_collection_cache.recreate_cache_if_needed() ret = [] for collection in restricted_collection_cache.cache: if acc_authorize_action(user_info, 'viewrestrcoll', collection=collection)[0] == 0: ret.append(collection) return ret def get_all_restricted_recids(): """ Return the set of all the restricted recids, i.e. the ids of those records which belong to at least one restricted collection. """ ret = HitSet() for collection in restricted_collection_cache.cache: ret |= get_collection_reclist(collection) return ret def get_restricted_collections_for_recid(recid, recreate_cache_if_needed=True): """ Return the list of restricted collection names to which recid belongs. """ if recreate_cache_if_needed: restricted_collection_cache.recreate_cache_if_needed() collection_reclist_cache.recreate_cache_if_needed() return [collection for collection in restricted_collection_cache.cache if recid in get_collection_reclist(collection, recreate_cache_if_needed=False)] def is_user_owner_of_record(user_info, recid): """ Check if the user is owner of the record, i.e. he is the submitter and/or belongs to a owner-like group authorized to 'see' the record. @param user_info: the user_info dictionary that describe the user. @type user_info: user_info dictionary @param recid: the record identifier. @type recid: positive integer @return: True if the user is 'owner' of the record; False otherwise @rtype: bool """ authorized_emails_or_group = [] for tag in CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS: authorized_emails_or_group.extend(get_fieldvalues(recid, tag)) for email_or_group in authorized_emails_or_group: if email_or_group in user_info['group']: return True email = email_or_group.strip().lower() if user_info['email'].strip().lower() == email: return True return False def check_user_can_view_record(user_info, recid): """ Check if the user is authorized to view the given recid. The function grants access in two cases: either user has author rights on this record, or he has view rights to the primary collection this record belongs to. @param user_info: the user_info dictionary that describe the user. @type user_info: user_info dictionary @param recid: the record identifier. @type recid: positive integer @return: (0, ''), when authorization is granted, (>0, 'message') when authorization is not granted @rtype: (int, string) """ if isinstance(recid, str): recid = int(recid) if record_public_p(recid): ## The record is already known to be public. return (0, '') ## At this point, either webcoll has not yet run or there are some ## restricted collections. Let's see first if the user own the record. if is_user_owner_of_record(user_info, recid): ## Perfect! It's authorized then! return (0, '') restricted_collections = get_restricted_collections_for_recid(recid, recreate_cache_if_needed=False) if restricted_collections: ## If there are restricted collections the user must be authorized to all of them for collection in get_restricted_collections_for_recid(recid, recreate_cache_if_needed=False): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collection) if auth_code: ## Ouch! the user is not authorized to this collection return (auth_code, auth_msg) ## OK! The user is authorized. return (0, '') if is_record_in_any_collection(recid, recreate_cache_if_needed=False): ## the record is not in any restricted collection return (0, '') elif record_exists(recid) > 0: ## We are in the case where webcoll has not run. ## Let's authorize SUPERADMIN (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=None) if auth_code == 0: return (0, '') else: ## Too bad. Let's print a nice message: return (1, """The record you are trying to access has just been submitted to the system and needs to be assigned to the proper collections. It is currently restricted for security reasons until the assignment will be fully completed. Please come back later to properly access this record.""") else: ## The record either does not exists or has been deleted. ## Let's handle these situations outside of this code. return (0, '') class IndexStemmingDataCacher(DataCacher): """ Provides cache for stemming information for word/phrase indexes. This class is not to be used directly; use function get_index_stemming_language() instead. """ def __init__(self): def cache_filler(): try: res = run_sql("""SELECT id, stemming_language FROM idxINDEX""") except DatabaseError: # database problems, return empty cache return {} return dict(res) def timestamp_verifier(): return get_table_update_time('idxINDEX') DataCacher.__init__(self, cache_filler, timestamp_verifier) try: index_stemming_cache.is_ok_p except Exception: index_stemming_cache = IndexStemmingDataCacher() def get_index_stemming_language(index_id, recreate_cache_if_needed=True): """Return stemming langugage for given index.""" if recreate_cache_if_needed: index_stemming_cache.recreate_cache_if_needed() return index_stemming_cache.cache[index_id] class CollectionRecListDataCacher(DataCacher): """ Provides cache for collection reclist hitsets. This class is not to be used directly; use function get_collection_reclist() instead. """ def __init__(self): def cache_filler(): ret = {} try: res = run_sql("SELECT name,reclist FROM collection") except Exception: # database problems, return empty cache return {} for name, reclist in res: ret[name] = None # this will be filled later during runtime by calling get_collection_reclist(coll) return ret def timestamp_verifier(): return get_table_update_time('collection') DataCacher.__init__(self, cache_filler, timestamp_verifier) try: if not collection_reclist_cache.is_ok_p: raise Exception except Exception: collection_reclist_cache = CollectionRecListDataCacher() def get_collection_reclist(coll, recreate_cache_if_needed=True): """Return hitset of recIDs that belong to the collection 'coll'.""" if recreate_cache_if_needed: collection_reclist_cache.recreate_cache_if_needed() if not collection_reclist_cache.cache[coll]: # not yet it the cache, so calculate it and fill the cache: set = HitSet() query = "SELECT nbrecs,reclist FROM collection WHERE name=%s" res = run_sql(query, (coll, ), 1) if res: try: set = HitSet(res[0][1]) except: pass collection_reclist_cache.cache[coll] = set # finally, return reclist: return collection_reclist_cache.cache[coll] class SearchResultsCache(DataCacher): """ Provides temporary lazy cache for Search Results. Useful when users click on `next page'. """ def __init__(self): def cache_filler(): return {} def timestamp_verifier(): return '1970-01-01 00:00:00' # lazy cache is always okay; # its filling is governed by # CFG_WEBSEARCH_SEARCH_CACHE_SIZE DataCacher.__init__(self, cache_filler, timestamp_verifier) try: if not search_results_cache.is_ok_p: raise Exception except Exception: search_results_cache = SearchResultsCache() class CollectionI18nNameDataCacher(DataCacher): """ Provides cache for I18N collection names. This class is not to be used directly; use function get_coll_i18nname() instead. """ def __init__(self): def cache_filler(): ret = {} try: res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name except Exception: # database problems return {} for c, ln, i18nname in res: if i18nname: if not ret.has_key(c): ret[c] = {} ret[c][ln] = i18nname return ret def timestamp_verifier(): return get_table_update_time('collectionname') DataCacher.__init__(self, cache_filler, timestamp_verifier) try: if not collection_i18nname_cache.is_ok_p: raise Exception except Exception: collection_i18nname_cache = CollectionI18nNameDataCacher() def get_coll_i18nname(c, ln=CFG_SITE_LANG, verify_cache_timestamp=True): """ Return nicely formatted collection name (of the name type `ln' (=long name)) for collection C in language LN. This function uses collection_i18nname_cache, but it verifies whether the cache is up-to-date first by default. This verification step is performed by checking the DB table update time. So, if you call this function 1000 times, it can get very slow because it will do 1000 table update time verifications, even though collection names change not that often. Hence the parameter VERIFY_CACHE_TIMESTAMP which, when set to False, will assume the cache is already up-to-date. This is useful namely in the generation of collection lists for the search results page. """ if verify_cache_timestamp: collection_i18nname_cache.recreate_cache_if_needed() out = c try: out = collection_i18nname_cache.cache[c][ln] except KeyError: pass # translation in LN does not exist return out class FieldI18nNameDataCacher(DataCacher): """ Provides cache for I18N field names. This class is not to be used directly; use function get_field_i18nname() instead. """ def __init__(self): def cache_filler(): ret = {} try: res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name except Exception: # database problems, return empty cache return {} for f, ln, i18nname in res: if i18nname: if not ret.has_key(f): ret[f] = {} ret[f][ln] = i18nname return ret def timestamp_verifier(): return get_table_update_time('fieldname') DataCacher.__init__(self, cache_filler, timestamp_verifier) try: if not field_i18nname_cache.is_ok_p: raise Exception except Exception: field_i18nname_cache = FieldI18nNameDataCacher() def get_field_i18nname(f, ln=CFG_SITE_LANG, verify_cache_timestamp=True): """ Return nicely formatted field name (of type 'ln', 'long name') for field F in language LN. If VERIFY_CACHE_TIMESTAMP is set to True, then verify DB timestamp and field I18N name cache timestamp and refresh cache from the DB if needed. Otherwise don't bother checking DB timestamp and return the cached value. (This is useful when get_field_i18nname is called inside a loop.) """ if verify_cache_timestamp: field_i18nname_cache.recreate_cache_if_needed() out = f try: out = field_i18nname_cache.cache[f][ln] except KeyError: pass # translation in LN does not exist return out def get_alphabetically_ordered_collection_list(level=0, ln=CFG_SITE_LANG): """Returns nicely ordered (score respected) list of collections, more exactly list of tuples (collection name, printable collection name). Suitable for create_search_box().""" out = [] res = run_sql("SELECT id,name FROM collection ORDER BY name ASC") for c_id, c_name in res: # make a nice printable name (e.g. truncate c_printable for # long collection names in given language): c_printable_fullname = get_coll_i18nname(c_name, ln, False) c_printable = wash_index_term(c_printable_fullname, 30, False) if c_printable != c_printable_fullname: c_printable = c_printable + "..." if level: c_printable = " " + level * '-' + " " + c_printable out.append([c_name, c_printable]) return out def get_nicely_ordered_collection_list(collid=1, level=0, ln=CFG_SITE_LANG): """Returns nicely ordered (score respected) list of collections, more exactly list of tuples (collection name, printable collection name). Suitable for create_search_box().""" colls_nicely_ordered = [] res = run_sql("""SELECT c.name,cc.id_son FROM collection_collection AS cc, collection AS c WHERE c.id=cc.id_son AND cc.id_dad=%s ORDER BY score DESC""", (collid, )) for c, cid in res: # make a nice printable name (e.g. truncate c_printable for # long collection names in given language): c_printable_fullname = get_coll_i18nname(c, ln, False) c_printable = wash_index_term(c_printable_fullname, 30, False) if c_printable != c_printable_fullname: c_printable = c_printable + "..." if level: c_printable = " " + level * '-' + " " + c_printable colls_nicely_ordered.append([c, c_printable]) colls_nicely_ordered = colls_nicely_ordered + get_nicely_ordered_collection_list(cid, level+1, ln=ln) return colls_nicely_ordered def get_index_id_from_field(field): """ Return index id with name corresponding to FIELD, or the first index id where the logical field code named FIELD is indexed. Return zero in case there is no index defined for this field. Example: field='author', output=4. """ out = 0 if field == '': field = 'global' # empty string field means 'global' index (field 'anyfield') # first look in the index table: res = run_sql("""SELECT id FROM idxINDEX WHERE name=%s""", (field,)) if res: out = res[0][0] return out # not found in the index table, now look in the field table: res = run_sql("""SELECT w.id FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f WHERE f.code=%s AND wf.id_field=f.id AND w.id=wf.id_idxINDEX LIMIT 1""", (field,)) if res: out = res[0][0] return out def get_words_from_pattern(pattern): "Returns list of whitespace-separated words from pattern." words = {} for word in string.split(pattern): if not words.has_key(word): words[word] = 1 return words.keys() def create_basic_search_units(req, p, f, m=None, of='hb'): """Splits search pattern and search field into a list of independently searchable units. - A search unit consists of '(operator, pattern, field, type, hitset)' tuples where 'operator' is set union (|), set intersection (+) or set exclusion (-); 'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics'); 'field' is either a code like 'title' or MARC tag like '100__a'; 'type' is the search type ('w' for word file search, 'a' for access file search). - Optionally, the function accepts the match type argument 'm'. If it is set (e.g. from advanced search interface), then it performs this kind of matching. If it is not set, then a guess is made. 'm' can have values: 'a'='all of the words', 'o'='any of the words', 'p'='phrase/substring', 'r'='regular expression', 'e'='exact value'. - Warnings are printed on req (when not None) in case of HTML output formats.""" opfts = [] # will hold (o,p,f,t,h) units # FIXME: quick hack for the journal index if f == 'journal': opfts.append(['+', p, f, 'w']) return opfts ## check arguments: is desired matching type set? if m: ## A - matching type is known; good! if m == 'e': # A1 - exact value: opfts.append(['+', p, f, 'a']) # '+' since we have only one unit elif m == 'p': # A2 - phrase/substring: opfts.append(['+', "%" + p + "%", f, 'a']) # '+' since we have only one unit elif m == 'r': # A3 - regular expression: opfts.append(['+', p, f, 'r']) # '+' since we have only one unit elif m == 'a' or m == 'w': # A4 - all of the words: p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed for word in get_words_from_pattern(p): opfts.append(['+', word, f, 'w']) # '+' in all units elif m == 'o': # A5 - any of the words: p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed for word in get_words_from_pattern(p): if len(opfts)==0: opfts.append(['+', word, f, 'w']) # '+' in the first unit else: opfts.append(['|', word, f, 'w']) # '|' in further units else: if of.startswith("h"): print_warning(req, "Matching type '%s' is not implemented yet." % cgi.escape(m), "Warning") opfts.append(['+', "%" + p + "%", f, 'w']) else: ## B - matching type is not known: let us try to determine it by some heuristics if f and p[0] == '"' and p[-1] == '"': ## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search opfts.append(['+', p[1:-1], f, 'a']) elif f in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor') and author_name_requires_phrase_search(p): ## B1 - do we search in author, and does 'p' contain space/comma/dot/etc? ## => doing washed ACC search opfts.append(['+', p, f, 'a']) elif f and p[0] == "'" and p[-1] == "'": ## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search opfts.append(['+', '%' + p[1:-1] + '%', f, 'a']) elif f and p[0] == "/" and p[-1] == "/": ## B0ter - does 'p' start and end by a slash, and is 'f' defined? => doing regexp search opfts.append(['+', p[1:-1], f, 'r']) elif f and string.find(p, ',') >= 0: ## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search opfts.append(['+', p, f, 'a']) elif f and str(f[0:2]).isdigit(): ## B2 - does 'f' exist and starts by two digits? => doing ACC search opfts.append(['+', p, f, 'a']) else: ## B3 - doing WRD search, but maybe ACC too # search units are separated by spaces unless the space is within single or double quotes # so, let us replace temporarily any space within quotes by '__SPACE__' p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p) p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p) p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p) # and spaces after colon as well: p = re_pattern_spaces_after_colon.sub(lambda x: string.replace(x.group(1), ' ', '__SPACE__'), p) # wash argument: p = re_equal.sub(":", p) p = re_logical_and.sub(" ", p) p = re_logical_or.sub(" |", p) p = re_logical_not.sub(" -", p) p = re_operators.sub(r' \1', p) for pi in string.split(p): # iterate through separated units (or items, as "pi" stands for "p item") pi = re_pattern_space.sub(" ", pi) # replace back '__SPACE__' by ' ' # firstly, determine set operator if pi[0] == '+' or pi[0] == '-' or pi[0] == '|': oi = pi[0] pi = pi[1:] else: # okay, there is no operator, so let us decide what to do by default oi = '+' # by default we are doing set intersection... # secondly, determine search pattern and field: if string.find(pi, ":") > 0: fi, pi = string.split(pi, ":", 1) fi = wash_field(fi) # test whether fi is a real index code or a MARC-tag defined code: if fi in get_fieldcodes() or '00' <= fi[:2] <= '99': pass else: # it is not, so join it back: fi, pi = f, fi + ":" + pi else: fi, pi = f, pi # wash 'fi' argument: fi = wash_field(fi) # wash 'pi' argument: pi = pi.strip() # strip eventual spaces if re_quotes.match(pi): # B3a - quotes are found => do ACC search (phrase search) if pi[0] == '"' and pi[-1] == '"': pi = string.replace(pi, '"', '') # remove quote signs opfts.append([oi, pi, fi, 'a']) elif pi[0] == "'" and pi[-1] == "'": pi = string.replace(pi, "'", "") # remove quote signs opfts.append([oi, "%" + pi + "%", fi, 'a']) else: # unbalanced quotes, so fall back to WRD query: opfts.append([oi, pi, fi, 'w']) elif pi.startswith('/') and pi.endswith('/'): # B3b - pi has slashes around => do regexp search opfts.append([oi, pi[1:-1], fi, 'r']) elif fi and str(fi[0]).isdigit() and str(fi[0]).isdigit(): # B3c - fi exists and starts by two digits => do ACC search opfts.append([oi, pi, fi, 'a']) elif fi and not get_index_id_from_field(fi) and get_field_name(fi): # B3d - logical field fi exists but there is no WRD index for fi => try ACC search opfts.append([oi, pi, fi, 'a']) else: # B3e - general case => do WRD search pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed for pii in get_words_from_pattern(pi): opfts.append([oi, pii, fi, 'w']) ## sanity check: for i in range(0, len(opfts)): try: pi = opfts[i][1] if pi == '*': if of.startswith("h"): print_warning(req, "Ignoring standalone wildcard word.", "Warning") del opfts[i] if pi == '' or pi == ' ': fi = opfts[i][2] if fi: if of.startswith("h"): print_warning(req, "Ignoring empty %s search term." % fi, "Warning") del opfts[i] except: pass ## replace old logical field names if applicable: if CFG_WEBSEARCH_FIELDS_CONVERT: opfts = [[o,p,wash_field(f),t] for o,p,f,t in opfts] ## return search units: return opfts def page_start(req, of, cc, aas, ln, uid, title_message=None, description='', keywords='', recID=-1, tab='', p=''): "Start page according to given output format." _ = gettext_set_language(ln) if not req or isinstance(req, cStringIO.OutputType): return # we were called from CLI if not title_message: title_message = _("Search Results") content_type = get_output_format_content_type(of) if of.startswith('x'): if of == 'xr': # we are doing RSS output req.content_type = "application/rss+xml" req.send_http_header() req.write("""\n""") else: # we are doing XML output: req.content_type = "text/xml" req.send_http_header() req.write("""\n""") elif of.startswith('t') or str(of[0:3]).isdigit(): # we are doing plain text output: req.content_type = "text/plain" req.send_http_header() elif of == "id": pass # nothing to do, we shall only return list of recIDs elif content_type == 'text/html': # we are doing HTML output: req.content_type = "text/html" req.send_http_header() if not description: description = "%s %s." % (cc, _("Search Results")) if not keywords: keywords = "%s, WebSearch, %s" % (get_coll_i18nname(CFG_SITE_NAME, ln, False), get_coll_i18nname(cc, ln, False)) ## generate RSS URL: argd = {} if req.args: argd = cgi.parse_qs(req.args) rssurl = websearch_templates.build_rss_url(argd) ## add MathJax if displaying single records (FIXME: find ## eventual better place to this code) if of.lower() in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: metaheaderadd = get_mathjax_header() else: metaheaderadd = '' ## generate navtrail: navtrail = create_navtrail_links(cc, aas, ln) if navtrail != '': navtrail += ' > ' if (tab != '' or ((of != '' or of.lower() != 'hd') and of != 'hb')) and \ recID != -1: # If we are not in information tab in HD format, customize # the nav. trail to have a link back to main record. (Due # to the way perform_request_search() works, hb # (lowercase) is equal to hd) navtrail += ' %s' % \ (CFG_SITE_URL, CFG_SITE_RECORD, recID, title_message) if (of != '' or of.lower() != 'hd') and of != 'hb': # Export format_name = of query = "SELECT name FROM format WHERE code=%s" res = run_sql(query, (of,)) if res: format_name = res[0][0] navtrail += ' > ' + format_name else: # Discussion, citations, etc. tabs tab_label = get_detailed_page_tabs(cc, ln=ln)[tab]['label'] navtrail += ' > ' + _(tab_label) else: navtrail += title_message if p: # we are serving search/browse results pages, so insert pattern: navtrail += ": " + cgi.escape(p) title_message = cgi.escape(p) + " - " + title_message body_css_classes = [] if cc: # we know the collection, lets allow page styles based on cc #collection names may not satisfy rules for css classes which #are something like: -?[_a-zA-Z]+[_a-zA-Z0-9-]* #however it isn't clear what we should do about cases with #numbers, so we leave them to fail. Everything else becomes "_" css = nmtoken_from_string(cc).replace('.','_').replace('-','_').replace(':','_') body_css_classes.append(css) ## finally, print page header: req.write(pageheaderonly(req=req, title=title_message, navtrail=navtrail, description=description, keywords=keywords, metaheaderadd=metaheaderadd, uid=uid, language=ln, navmenuid='search', navtrail_append_title_p=0, rssurl=rssurl, body_css_classes=body_css_classes)) req.write(websearch_templates.tmpl_search_pagestart(ln=ln)) #else: # req.send_http_header() def page_end(req, of="hb", ln=CFG_SITE_LANG): "End page according to given output format: e.g. close XML tags, add HTML footer, etc." if of == "id": return [] # empty recID list if not req: return # we were called from CLI if of.startswith('h'): req.write(websearch_templates.tmpl_search_pageend(ln = ln)) # pagebody end req.write(pagefooteronly(lastupdated=__lastupdated__, language=ln, req=req)) return def create_page_title_search_pattern_info(p, p1, p2, p3): """Create the search pattern bit for the page web page HTML header. Basically combine p and (p1,p2,p3) together so that the page header may be filled whether we are in the Simple Search or Advanced Search interface contexts.""" out = "" if p: out = p else: out = p1 if p2: out += ' ' + p2 if p3: out += ' ' + p3 return out def create_inputdate_box(name="d1", selected_year=0, selected_month=0, selected_day=0, ln=CFG_SITE_LANG): "Produces 'From Date', 'Until Date' kind of selection box. Suitable for search options." _ = gettext_set_language(ln) box = "" # day box += """<select name="%sd">""" % name box += """<option value="">%s""" % _("any day") for day in range(1, 32): box += """<option value="%02d"%s>%02d""" % (day, is_selected(day, selected_day), day) box += """</select>""" # month box += """<select name="%sm">""" % name box += """<option value="">%s""" % _("any month") for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \ (5, _("May")), (6, _("June")), (7, _("July")), (8, _("August")), \ (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]: box += """<option value="%02d"%s>%s""" % (mm, is_selected(mm, selected_month), month) box += """</select>""" # year box += """<select name="%sy">""" % name box += """<option value="">%s""" % _("any year") this_year = int(time.strftime("%Y", time.localtime())) for year in range(this_year-20, this_year+1): box += """<option value="%d"%s>%d""" % (year, is_selected(year, selected_year), year) box += """</select>""" return box def create_search_box(cc, colls, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action=""): """Create search box for 'search again in the results page' functionality.""" # load the right message language _ = gettext_set_language(ln) # some computations cc_intl = get_coll_i18nname(cc, ln, False) cc_colID = get_colID(cc) colls_nicely_ordered = [] if cfg_nicely_ordered_collection_list: colls_nicely_ordered = get_nicely_ordered_collection_list(ln=ln) else: colls_nicely_ordered = get_alphabetically_ordered_collection_list(ln=ln) colls_nice = [] for (cx, cx_printable) in colls_nicely_ordered: if not cx.startswith("Unnamed collection"): colls_nice.append({ 'value' : cx, 'text' : cx_printable }) coll_selects = [] if colls and colls[0] != CFG_SITE_NAME: # some collections are defined, so print these first, and only then print 'add another collection' heading: for c in colls: if c: temp = [] temp.append({ 'value' : CFG_SITE_NAME, 'text' : '*** %s ***' % _("any public collection") }) # this field is used to remove the current collection from the ones to be searched. temp.append({ 'value' : '', 'text' : '*** %s ***' % _("remove this collection") }) for val in colls_nice: # print collection: if not cx.startswith("Unnamed collection"): temp.append({ 'value' : val['value'], 'text' : val['text'], 'selected' : (c == re.sub("^[\s\-]*","", val['value'])) }) coll_selects.append(temp) coll_selects.append([{ 'value' : '', 'text' : '*** %s ***' % _("add another collection") }] + colls_nice) else: # we searched in CFG_SITE_NAME, so print 'any public collection' heading coll_selects.append([{ 'value' : CFG_SITE_NAME, 'text' : '*** %s ***' % _("any public collection") }] + colls_nice) ## ranking methods ranks = [{ 'value' : '', 'text' : "- %s %s -" % (_("OR").lower (), _("rank by")), }] for (code, name) in get_bibrank_methods(cc_colID, ln): # propose found rank methods: ranks.append({ 'value' : code, 'text' : name, }) formats = [] query = """SELECT code,name FROM format WHERE visibility='1' ORDER BY name ASC""" res = run_sql(query) if res: # propose found formats: for code, name in res: formats.append({ 'value' : code, 'text' : name }) else: formats.append({'value' : 'hb', 'text' : _("HTML brief") }) # show collections in the search box? (not if there is only one # collection defined, and not if we are in light search) show_colls = True show_title = True if len(collection_reclist_cache.cache.keys()) == 1 or \ aas == -1: show_colls = False show_title = False if cc == CFG_SITE_NAME: show_title = False if CFG_INSPIRE_SITE: show_title = False return websearch_templates.tmpl_search_box( ln = ln, aas = aas, cc_intl = cc_intl, cc = cc, ot = ot, sp = sp, action = action, fieldslist = get_searchwithin_fields(ln=ln, colID=cc_colID), f1 = f1, f2 = f2, f3 = f3, m1 = m1, m2 = m2, m3 = m3, p1 = p1, p2 = p2, p3 = p3, op1 = op1, op2 = op2, rm = rm, p = p, f = f, coll_selects = coll_selects, d1y = d1y, d2y = d2y, d1m = d1m, d2m = d2m, d1d = d1d, d2d = d2d, dt = dt, sort_fields = get_sortby_fields(ln=ln, colID=cc_colID), sf = sf, so = so, ranks = ranks, sc = sc, rg = rg, formats = formats, of = of, pl = pl, jrec = jrec, ec = ec, show_colls = show_colls, show_title = show_title, ) def create_navtrail_links(cc=CFG_SITE_NAME, aas=0, ln=CFG_SITE_LANG, self_p=1, tab=''): """Creates navigation trail links, i.e. links to collection ancestors (except Home collection). If aas==1, then links to Advanced Search interfaces; otherwise Simple Search. """ dads = [] for dad in get_coll_ancestors(cc): if dad != CFG_SITE_NAME: # exclude Home collection dads.append ((dad, get_coll_i18nname(dad, ln, False))) if self_p and cc != CFG_SITE_NAME: dads.append((cc, get_coll_i18nname(cc, ln, False))) return websearch_templates.tmpl_navtrail_links( aas=aas, ln=ln, dads=dads) def get_searchwithin_fields(ln='en', colID=None): """Retrieves the fields name used in the 'search within' selection box for the collection ID colID.""" res = None if colID: res = run_sql("""SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='sew' AND cff.id_collection=%s AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""", (colID,)) if not res: res = run_sql("SELECT code,name FROM field ORDER BY name ASC") fields = [{ 'value' : '', 'text' : get_field_i18nname("any field", ln, False) }] for field_code, field_name in res: if field_code and field_code != "anyfield": fields.append({ 'value' : field_code, 'text' : get_field_i18nname(field_name, ln, False) }) return fields def get_sortby_fields(ln='en', colID=None): """Retrieves the fields name used in the 'sort by' selection box for the collection ID colID.""" _ = gettext_set_language(ln) res = None if colID: res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""", (colID,)) if not res: # no sort fields defined for this colID, try to take Home collection: res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""", (1,)) if not res: # no sort fields defined for the Home collection, take all sort fields defined wherever they are: res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='soo' AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""",) fields = [{ 'value' : '', 'text' : _("latest first") }] for field_code, field_name in res: if field_code and field_code != "anyfield": fields.append({ 'value' : field_code, 'text' : get_field_i18nname(field_name, ln, False) }) return fields def create_andornot_box(name='op', value='', ln='en'): "Returns HTML code for the AND/OR/NOT selection box." _ = gettext_set_language(ln) out = """ <select name="%s"> <option value="a"%s>%s <option value="o"%s>%s <option value="n"%s>%s </select> """ % (name, is_selected('a', value), _("AND"), is_selected('o', value), _("OR"), is_selected('n', value), _("AND NOT")) return out def create_matchtype_box(name='m', value='', ln='en'): "Returns HTML code for the 'match type' selection box." _ = gettext_set_language(ln) out = """ <select name="%s"> <option value="a"%s>%s <option value="o"%s>%s <option value="e"%s>%s <option value="p"%s>%s <option value="r"%s>%s </select> """ % (name, is_selected('a', value), _("All of the words:"), is_selected('o', value), _("Any of the words:"), is_selected('e', value), _("Exact phrase:"), is_selected('p', value), _("Partial phrase:"), is_selected('r', value), _("Regular expression:")) return out def is_selected(var, fld): "Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes." if type(var) is int and type(fld) is int: if var == fld: return " selected" elif str(var) == str(fld): return " selected" elif fld and len(fld)==3 and fld[0] == "w" and var == fld[1:]: return " selected" return "" def wash_colls(cc, c, split_colls=0, verbose=0): """Wash collection list by checking whether user has deselected anything under 'Narrow search'. Checks also if cc is a list or not. Return list of cc, colls_to_display, colls_to_search since the list of collections to display is different from that to search in. This is because users might have chosen 'split by collection' functionality. The behaviour of "collections to display" depends solely whether user has deselected a particular collection: e.g. if it started from 'Articles and Preprints' page, and deselected 'Preprints', then collection to display is 'Articles'. If he did not deselect anything, then collection to display is 'Articles & Preprints'. The behaviour of "collections to search in" depends on the 'split_colls' parameter: * if is equal to 1, then we can wash the colls list down and search solely in the collection the user started from; * if is equal to 0, then we are splitting to the first level of collections, i.e. collections as they appear on the page we started to search from; The function raises exception InvenioWebSearchUnknownCollectionError if cc or one of c collections is not known. """ colls_out = [] colls_out_for_display = [] # list to hold the hosted collections to be searched and displayed hosted_colls_out = [] debug = "" if verbose: debug += "<br />" debug += "<br />1) --- initial parameters ---" debug += "<br />cc : %s" % cc debug += "<br />c : %s" % c debug += "<br />" # check what type is 'cc': if type(cc) is list: for ci in cc: if collection_reclist_cache.cache.has_key(ci): # yes this collection is real, so use it: cc = ci break else: # check once if cc is real: if not collection_reclist_cache.cache.has_key(cc): if cc: raise InvenioWebSearchUnknownCollectionError(cc) else: cc = CFG_SITE_NAME # cc is not set, so replace it with Home collection # check type of 'c' argument: if type(c) is list: colls = c else: colls = [c] if verbose: debug += "<br />2) --- after check for the integrity of cc and the being or not c a list ---" debug += "<br />cc : %s" % cc debug += "<br />c : %s" % c debug += "<br />" # remove all 'unreal' collections: colls_real = [] for coll in colls: if collection_reclist_cache.cache.has_key(coll): colls_real.append(coll) else: if coll: raise InvenioWebSearchUnknownCollectionError(coll) colls = colls_real if verbose: debug += "<br />3) --- keeping only the real colls of c ---" debug += "<br />colls : %s" % colls debug += "<br />" # check if some real collections remain: if len(colls)==0: colls = [cc] if verbose: debug += "<br />4) --- in case no colls were left we use cc directly ---" debug += "<br />colls : %s" % colls debug += "<br />" # then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll': res = run_sql("""SELECT c.name FROM collection AS c, collection_collection AS cc, collection AS ccc WHERE c.id=cc.id_son AND cc.id_dad=ccc.id AND ccc.name=%s AND cc.type='r'""", (cc,)) # list that holds all the non restricted sons of cc that are also not hosted collections l_cc_nonrestricted_sons_and_nonhosted_colls = [] res_hosted = run_sql("""SELECT c.name FROM collection AS c, collection_collection AS cc, collection AS ccc WHERE c.id=cc.id_son AND cc.id_dad=ccc.id AND ccc.name=%s AND cc.type='r' AND (c.dbquery NOT LIKE 'hostedcollection:%%' OR c.dbquery IS NULL)""", (cc,)) for row_hosted in res_hosted: l_cc_nonrestricted_sons_and_nonhosted_colls.append(row_hosted[0]) l_cc_nonrestricted_sons_and_nonhosted_colls.sort() l_cc_nonrestricted_sons = [] l_c = colls for row in res: if not collection_restricted_p(row[0]): l_cc_nonrestricted_sons.append(row[0]) l_c.sort() l_cc_nonrestricted_sons.sort() if l_cc_nonrestricted_sons == l_c: colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc' # the following elif is a hack that preserves the above funcionality when we start searching from # the frontpage with some hosted collections deselected (either by default or manually) elif set(l_cc_nonrestricted_sons_and_nonhosted_colls).issubset(set(l_c)): colls_out_for_display = colls split_colls = 0 else: colls_out_for_display = colls # nope, we need to display all 'colls' successively # remove duplicates: #colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1)) #colls_out_for_display = map(lambda x, colls_out_for_display=colls_out_for_display:colls_out_for_display[x-1], colls_out_for_display_nondups) colls_out_for_display = list(set(colls_out_for_display)) if verbose: debug += "<br />5) --- decide whether colls_out_for_diplay should be colls or is it sufficient for it to be cc; remove duplicates ---" debug += "<br />colls_out_for_display : %s" % colls_out_for_display debug += "<br />" # FIXME: The below quoted part of the code has been commented out # because it prevents searching in individual restricted daughter # collections when both parent and all its public daughter # collections were asked for, in addition to some restricted # daughter collections. The removal was introduced for hosted # collections, so we may want to double check in this context. # the following piece of code takes care of removing collections whose ancestors are going to be searched anyway # list to hold the collections to be removed #colls_to_be_removed = [] # first calculate the collections that can safely be removed #for coll in colls_out_for_display: # for ancestor in get_coll_ancestors(coll): # #if ancestor in colls_out_for_display: colls_to_be_removed.append(coll) # if ancestor in colls_out_for_display and not is_hosted_collection(coll): colls_to_be_removed.append(coll) # secondly remove the collections #for coll in colls_to_be_removed: # colls_out_for_display.remove(coll) if verbose: debug += "<br />6) --- remove collections that have ancestors about to be search, unless they are hosted ---" debug += "<br />colls_out_for_display : %s" % colls_out_for_display debug += "<br />" # calculate the hosted collections to be searched. if colls_out_for_display == [cc]: if is_hosted_collection(cc): hosted_colls_out.append(cc) else: for coll in get_coll_sons(cc): if is_hosted_collection(coll): hosted_colls_out.append(coll) else: for coll in colls_out_for_display: if is_hosted_collection(coll): hosted_colls_out.append(coll) if verbose: debug += "<br />7) --- calculate the hosted_colls_out ---" debug += "<br />hosted_colls_out : %s" % hosted_colls_out debug += "<br />" # second, let us decide on collection splitting: if split_colls == 0: # type A - no sons are wanted colls_out = colls_out_for_display else: # type B - sons (first-level descendants) are wanted for coll in colls_out_for_display: coll_sons = get_coll_sons(coll) if coll_sons == []: colls_out.append(coll) else: for coll_son in coll_sons: if not is_hosted_collection(coll_son): colls_out.append(coll_son) #else: # colls_out = colls_out + coll_sons # remove duplicates: #colls_out_nondups=filter(lambda x, colls_out=colls_out: colls_out[x-1] not in colls_out[x:], range(1, len(colls_out)+1)) #colls_out = map(lambda x, colls_out=colls_out:colls_out[x-1], colls_out_nondups) colls_out = list(set(colls_out)) if verbose: debug += "<br />8) --- calculate the colls_out; remove duplicates ---" debug += "<br />colls_out : %s" % colls_out debug += "<br />" # remove the hosted collections from the collections to be searched if hosted_colls_out: for coll in hosted_colls_out: try: colls_out.remove(coll) except ValueError: # in case coll was not found in colls_out pass if verbose: debug += "<br />9) --- remove the hosted_colls from the colls_out ---" debug += "<br />colls_out : %s" % colls_out return (cc, colls_out_for_display, colls_out, hosted_colls_out, debug) def strip_accents(x): """Strip accents in the input phrase X (assumed in UTF-8) by replacing accented characters with their unaccented cousins (e.g. é by e). Return such a stripped X.""" x = re_latex_lowercase_a.sub("a", x) x = re_latex_lowercase_ae.sub("ae", x) x = re_latex_lowercase_e.sub("e", x) x = re_latex_lowercase_i.sub("i", x) x = re_latex_lowercase_o.sub("o", x) x = re_latex_lowercase_u.sub("u", x) x = re_latex_lowercase_y.sub("x", x) x = re_latex_lowercase_c.sub("c", x) x = re_latex_lowercase_n.sub("n", x) x = re_latex_uppercase_a.sub("A", x) x = re_latex_uppercase_ae.sub("AE", x) x = re_latex_uppercase_e.sub("E", x) x = re_latex_uppercase_i.sub("I", x) x = re_latex_uppercase_o.sub("O", x) x = re_latex_uppercase_u.sub("U", x) x = re_latex_uppercase_y.sub("Y", x) x = re_latex_uppercase_c.sub("C", x) x = re_latex_uppercase_n.sub("N", x) # convert input into Unicode string: try: y = unicode(x, "utf-8") except: return x # something went wrong, probably the input wasn't UTF-8 # asciify Latin-1 lowercase characters: y = re_unicode_lowercase_a.sub("a", y) y = re_unicode_lowercase_ae.sub("ae", y) y = re_unicode_lowercase_e.sub("e", y) y = re_unicode_lowercase_i.sub("i", y) y = re_unicode_lowercase_o.sub("o", y) y = re_unicode_lowercase_u.sub("u", y) y = re_unicode_lowercase_y.sub("y", y) y = re_unicode_lowercase_c.sub("c", y) y = re_unicode_lowercase_n.sub("n", y) # asciify Latin-1 uppercase characters: y = re_unicode_uppercase_a.sub("A", y) y = re_unicode_uppercase_ae.sub("AE", y) y = re_unicode_uppercase_e.sub("E", y) y = re_unicode_uppercase_i.sub("I", y) y = re_unicode_uppercase_o.sub("O", y) y = re_unicode_uppercase_u.sub("U", y) y = re_unicode_uppercase_y.sub("Y", y) y = re_unicode_uppercase_c.sub("C", y) y = re_unicode_uppercase_n.sub("N", y) # return UTF-8 representation of the Unicode string: return y.encode("utf-8") def wash_index_term(term, max_char_length=50, lower_term=True): """ Return washed form of the index term TERM that would be suitable for storing into idxWORD* tables. I.e., lower the TERM if LOWER_TERM is True, and truncate it safely to MAX_CHAR_LENGTH UTF-8 characters (meaning, in principle, 4*MAX_CHAR_LENGTH bytes). The function works by an internal conversion of TERM, when needed, from its input Python UTF-8 binary string format into Python Unicode format, and then truncating it safely to the given number of UTF-8 characters, without possible mis-truncation in the middle of a multi-byte UTF-8 character that could otherwise happen if we would have been working with UTF-8 binary representation directly. Note that MAX_CHAR_LENGTH corresponds to the length of the term column in idxINDEX* tables. """ if lower_term: washed_term = unicode(term, 'utf-8').lower() else: washed_term = unicode(term, 'utf-8') if len(washed_term) <= max_char_length: # no need to truncate the term, because it will fit # nicely even if it uses four-byte UTF-8 characters return washed_term.encode('utf-8') else: # truncate the term in a safe position: return washed_term[:max_char_length].encode('utf-8') def lower_index_term(term): """ Return safely lowered index term TERM. This is done by converting to UTF-8 first, because standard Python lower() function is not UTF-8 safe. To be called by both the search engine and the indexer when appropriate (e.g. before stemming). In case of problems with UTF-8 compliance, this function raises UnicodeDecodeError, so the client code may want to catch it. """ return unicode(term, 'utf-8').lower().encode('utf-8') def get_synonym_terms(term, kbr_name, match_type): """ Return list of synonyms for TERM by looking in KBR_NAME in MATCH_TYPE style. @param term: search-time term or index-time term @type term: str @param kbr_name: knowledge base name @type kbr_name: str @param match_type: specifies how the term matches against the KBR before doing the lookup. Could be `exact' (default), 'leading_to_comma', `leading_to_number'. @type match_type: str @return: list of term synonyms @rtype: list of strings """ dterms = {} ## exact match is default: term_for_lookup = term term_remainder = '' ## but maybe match different term: if match_type == 'leading_to_comma': mmm = re.match(r'^(.*?)(\s*,.*)$', term) if mmm: term_for_lookup = mmm.group(1) term_remainder = mmm.group(2) elif match_type == 'leading_to_number': mmm = re.match(r'^(.*?)(\s*\d.*)$', term) if mmm: term_for_lookup = mmm.group(1) term_remainder = mmm.group(2) ## FIXME: workaround: escaping SQL wild-card signs, since KBR's ## exact search is doing LIKE query, so would match everything: term_for_lookup = term_for_lookup.replace('%', '\%') ## OK, now find synonyms: for kbr_values in get_kbr_values(kbr_name, searchkey=term_for_lookup, searchtype='e'): for kbr_value in kbr_values: dterms[kbr_value + term_remainder] = 1 ## return list of term synonyms: return dterms.keys() def wash_output_format(format): """Wash output format FORMAT. Currently only prevents input like 'of=9' for backwards-compatible format that prints certain fields only. (for this task, 'of=tm' is preferred)""" if str(format[0:3]).isdigit() and len(format) != 6: # asked to print MARC tags, but not enough digits, # so let's switch back to HTML brief default return 'hb' else: return format def wash_pattern(p): """Wash pattern passed by URL. Check for sanity of the wildcard by removing wildcards if they are appended to extremely short words (1-3 letters). TODO: instead of this approximative treatment, it will be much better to introduce a temporal limit, e.g. to kill a query if it does not finish in 10 seconds.""" # strip accents: # p = strip_accents(p) # FIXME: when available, strip accents all the time # add leading/trailing whitespace for the two following wildcard-sanity checking regexps: p = " " + p + " " # replace spaces within quotes by __SPACE__ temporarily: p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p) p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p) p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p) # get rid of unquoted wildcards after spaces: p = re_pattern_wildcards_after_spaces.sub("\\1", p) # get rid of extremely short words (1-3 letters with wildcards): #p = re_pattern_short_words.sub("\\1", p) # replace back __SPACE__ by spaces: p = re_pattern_space.sub(" ", p) # replace special terms: p = re_pattern_today.sub(time.strftime("%Y-%m-%d", time.localtime()), p) # remove unnecessary whitespace: p = string.strip(p) # remove potentially wrong UTF-8 characters: p = wash_for_utf8(p) return p def wash_field(f): """Wash field passed by URL.""" if f: # get rid of unnecessary whitespace and make it lowercase # (e.g. Author -> author) to better suit iPhone etc input # mode: f = f.strip().lower() # wash legacy 'f' field names, e.g. replace 'wau' or `au' by # 'author', if applicable: if CFG_WEBSEARCH_FIELDS_CONVERT: f = CFG_WEBSEARCH_FIELDS_CONVERT.get(f, f) return f def wash_dates(d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0): """ Take user-submitted date arguments D1 (full datetime string) or (D1Y, D1M, D1Y) year, month, day tuple and D2 or (D2Y, D2M, D2Y) and return (YYY1-M1-D2 H1:M1:S2, YYY2-M2-D2 H2:M2:S2) datetime strings in the YYYY-MM-DD HH:MM:SS format suitable for time restricted searching. Note that when both D1 and (D1Y, D1M, D1D) parameters are present, the precedence goes to D1. Ditto for D2*. Note that when (D1Y, D1M, D1D) are taken into account, some values may be missing and are completed e.g. to 01 or 12 according to whether it is the starting or the ending date. """ datetext1, datetext2 = "", "" # sanity checking: if d1 == "" and d1y == 0 and d1m == 0 and d1d == 0 and d2 == "" and d2y == 0 and d2m == 0 and d2d == 0: return ("", "") # nothing selected, so return empty values # wash first (starting) date: if d1: # full datetime string takes precedence: datetext1 = d1 else: # okay, first date passed as (year,month,day): if d1y: datetext1 += "%04d" % d1y else: datetext1 += "0000" if d1m: datetext1 += "-%02d" % d1m else: datetext1 += "-01" if d1d: datetext1 += "-%02d" % d1d else: datetext1 += "-01" datetext1 += " 00:00:00" # wash second (ending) date: if d2: # full datetime string takes precedence: datetext2 = d2 else: # okay, second date passed as (year,month,day): if d2y: datetext2 += "%04d" % d2y else: datetext2 += "9999" if d2m: datetext2 += "-%02d" % d2m else: datetext2 += "-12" if d2d: datetext2 += "-%02d" % d2d else: datetext2 += "-31" # NOTE: perhaps we should add max(datenumber) in # given month, but for our quering it's not # needed, 31 will always do datetext2 += " 00:00:00" # okay, return constructed YYYY-MM-DD HH:MM:SS datetexts: return (datetext1, datetext2) def is_hosted_collection(coll): """Check if the given collection is a hosted one; i.e. its dbquery starts with hostedcollection: Returns True if it is, False if it's not or if the result is empty or if the query failed""" res = run_sql("SELECT dbquery FROM collection WHERE name=%s", (coll, )) try: return res[0][0].startswith("hostedcollection:") except: return False def get_colID(c): "Return collection ID for collection name C. Return None if no match found." colID = None res = run_sql("SELECT id FROM collection WHERE name=%s", (c,), 1) if res: colID = res[0][0] return colID def get_coll_normalised_name(c): """Returns normalised collection name (case sensitive) for collection name C (case insensitive). Returns None if no match found.""" try: return run_sql("SELECT name FROM collection WHERE name=%s", (c,))[0][0] except: return None def get_coll_ancestors(coll): "Returns a list of ancestors for collection 'coll'." coll_ancestors = [] coll_ancestor = coll while 1: res = run_sql("""SELECT c.name FROM collection AS c LEFT JOIN collection_collection AS cc ON c.id=cc.id_dad LEFT JOIN collection AS ccc ON ccc.id=cc.id_son WHERE ccc.name=%s ORDER BY cc.id_dad ASC LIMIT 1""", (coll_ancestor,)) if res: coll_name = res[0][0] coll_ancestors.append(coll_name) coll_ancestor = coll_name else: break # ancestors found, return reversed list: coll_ancestors.reverse() return coll_ancestors def get_coll_sons(coll, type='r', public_only=1): """Return a list of sons (first-level descendants) of type 'type' for collection 'coll'. If public_only, then return only non-restricted son collections. """ coll_sons = [] query = "SELECT c.name FROM collection AS c "\ "LEFT JOIN collection_collection AS cc ON c.id=cc.id_son "\ "LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad "\ "WHERE cc.type=%s AND ccc.name=%s" query += " ORDER BY cc.score DESC" res = run_sql(query, (type, coll)) for name in res: if not public_only or not collection_restricted_p(name[0]): coll_sons.append(name[0]) return coll_sons def get_coll_real_descendants(coll, type='_', get_hosted_colls=True): """Return a list of all descendants of collection 'coll' that are defined by a 'dbquery'. IOW, we need to decompose compound collections like "A & B" into "A" and "B" provided that "A & B" has no associated database query defined. """ coll_sons = [] res = run_sql("""SELECT c.name,c.dbquery FROM collection AS c LEFT JOIN collection_collection AS cc ON c.id=cc.id_son LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad WHERE ccc.name=%s AND cc.type LIKE %s ORDER BY cc.score DESC""", (coll, type,)) for name, dbquery in res: if dbquery: # this is 'real' collection, so return it: if get_hosted_colls: coll_sons.append(name) else: if not dbquery.startswith("hostedcollection:"): coll_sons.append(name) else: # this is 'composed' collection, so recurse: coll_sons.extend(get_coll_real_descendants(name)) return coll_sons def browse_pattern(req, colls, p, f, rg, ln=CFG_SITE_LANG): """Browse either biliographic phrases or words indexes, and display it.""" # load the right message language _ = gettext_set_language(ln) ## is p enclosed in quotes? (coming from exact search) if p.startswith('"') and p.endswith('"'): p = p[1:-1] p_orig = p ## okay, "real browse" follows: ## FIXME: the maths in the get_nearest_terms_in_bibxxx is just a test if not f and string.find(p, ":") > 0: # does 'p' contain ':'? f, p = string.split(p, ":", 1) ## do we search in words indexes? if not f: return browse_in_bibwords(req, p, f) index_id = get_index_id_from_field(f) if index_id != 0: coll = HitSet() for coll_name in colls: coll |= get_collection_reclist(coll_name) browsed_phrases_in_colls = get_nearest_terms_in_idxphrase_with_collection(p, index_id, rg/2, rg/2, coll) else: browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1) while not browsed_phrases: # try again and again with shorter and shorter pattern: try: p = p[:-1] browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1) except: # probably there are no hits at all: req.write(_("No values found.")) return ## try to check hits in these particular collection selection: browsed_phrases_in_colls = [] if 0: for phrase in browsed_phrases: phrase_hitset = HitSet() phrase_hitsets = search_pattern("", phrase, f, 'e') for coll in colls: phrase_hitset.union_update(phrase_hitsets[coll]) if len(phrase_hitset) > 0: # okay, this phrase has some hits in colls, so add it: browsed_phrases_in_colls.append([phrase, len(phrase_hitset)]) ## were there hits in collections? if browsed_phrases_in_colls == []: if browsed_phrases != []: #print_warning(req, """<p>No match close to <em>%s</em> found in given collections. #Please try different term.<p>Displaying matches in any collection...""" % p_orig) ## try to get nbhits for these phrases in any collection: for phrase in browsed_phrases: browsed_phrases_in_colls.append([phrase, get_nbhits_in_bibxxx(phrase, f)]) ## display results now: out = websearch_templates.tmpl_browse_pattern( f=f, fn=get_field_i18nname(get_field_name(f) or f, ln, False), ln=ln, browsed_phrases_in_colls=browsed_phrases_in_colls, colls=colls, rg=rg, ) req.write(out) return def browse_in_bibwords(req, p, f, ln=CFG_SITE_LANG): """Browse inside words indexes.""" if not p: return _ = gettext_set_language(ln) urlargd = {} urlargd.update(req.argd) urlargd['action'] = 'search' nearest_box = create_nearest_terms_box(urlargd, p, f, 'w', ln=ln, intro_text_p=0) req.write(websearch_templates.tmpl_search_in_bibwords( p = p, f = f, ln = ln, nearest_box = nearest_box )) return def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0): """Search for complex pattern 'p' within field 'f' according to matching type 'm'. Return hitset of recIDs. The function uses multi-stage searching algorithm in case of no exact match found. See the Search Internals document for detailed description. The 'ap' argument governs whether an alternative patterns are to be used in case there is no direct hit for (p,f,m). For example, whether to replace non-alphanumeric characters by spaces if it would give some hits. See the Search Internals document for detailed description. (ap=0 forbits the alternative pattern usage, ap=1 permits it.) The 'of' argument governs whether to print or not some information to the user in case of no match found. (Usually it prints the information in case of HTML formats, otherwise it's silent). The 'verbose' argument controls the level of debugging information to be printed (0=least, 9=most). All the parameters are assumed to have been previously washed. This function is suitable as a mid-level API. """ _ = gettext_set_language(ln) hitset_empty = HitSet() # sanity check: if not p: hitset_full = HitSet(trailing_bits=1) hitset_full.discard(0) # no pattern, so return all universe return hitset_full # search stage 1: break up arguments into basic search units: if verbose and of.startswith("h"): t1 = os.times()[4] basic_search_units = create_basic_search_units(req, p, f, m, of) if verbose and of.startswith("h"): t2 = os.times()[4] print_warning(req, "Search stage 1: basic search units are: %s" % cgi.escape(repr(basic_search_units))) print_warning(req, "Search stage 1: execution took %.2f seconds." % (t2 - t1)) # search stage 2: do search for each search unit and verify hit presence: if verbose and of.startswith("h"): t1 = os.times()[4] basic_search_units_hitsets = [] #prepare hiddenfield-related.. myhiddens = CFG_BIBFORMAT_HIDDEN_TAGS can_see_hidden = False if req: user_info = collect_user_info(req) can_see_hidden = (acc_authorize_action(user_info, 'runbibedit')[0] == 0) if can_see_hidden: myhiddens = [] if CFG_INSPIRE_SITE and of.startswith('h'): # fulltext/caption search warnings for INSPIRE: fields_to_be_searched = [f for o,p,f,m in basic_search_units] if 'fulltext' in fields_to_be_searched: print_warning(req, _("Warning: full-text search is only available for a subset of papers mostly from 2006-2011.")) elif 'caption' in fields_to_be_searched: print_warning(req, _("Warning: figure caption search is only available for a subset of papers mostly from 2008-2011.")) for idx_unit in xrange(len(basic_search_units)): bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit] if len(bsu_f) < 2 and not bsu_f == '': if of.startswith("h"): print_warning(req, _("There is no index %s. Searching for %s in all fields." % (bsu_f, bsu_p))) bsu_f = '' bsu_m = 'w' if of.startswith("h") and verbose: print_warning(req, _('Instead searching %s.' % str([bsu_o, bsu_p, bsu_f, bsu_m]))) try: basic_search_unit_hitset = search_unit(bsu_p, bsu_f, bsu_m, wl) except InvenioWebSearchWildcardLimitError, excp: basic_search_unit_hitset = excp.res if of.startswith("h"): print_warning(req, _("Search term too generic, displaying only partial results...")) # FIXME: print warning if we use native full-text indexing if bsu_f == 'fulltext' and bsu_m != 'w' and of.startswith('h') and not CFG_SOLR_URL: print_warning(req, _("No phrase index available for fulltext yet, looking for word combination...")) #check that the user is allowed to search with this tag #if he/she tries it if bsu_f and len(bsu_f) > 1 and bsu_f[0].isdigit() and bsu_f[1].isdigit(): for htag in myhiddens: ltag = len(htag) samelenfield = bsu_f[0:ltag] if samelenfield == htag: #user searches by a hidden tag #we won't show you anything.. basic_search_unit_hitset = HitSet() if verbose >= 9 and of.startswith("h"): print_warning(req, "Pattern %s hitlist omitted since \ it queries in a hidden tag %s" % (repr(bsu_p), repr(myhiddens))) display_nearest_terms_box=False #..and stop spying, too. if verbose >= 9 and of.startswith("h"): print_warning(req, "Search stage 1: pattern %s gave hitlist %s" % (cgi.escape(bsu_p), basic_search_unit_hitset)) if len(basic_search_unit_hitset) > 0 or \ ap==0 or \ bsu_o=="|" or \ ((idx_unit+1)<len(basic_search_units) and basic_search_units[idx_unit+1][0]=="|"): # stage 2-1: this basic search unit is retained, since # either the hitset is non-empty, or the approximate # pattern treatment is switched off, or the search unit # was joined by an OR operator to preceding/following # units so we do not require that it exists basic_search_units_hitsets.append(basic_search_unit_hitset) else: # stage 2-2: no hits found for this search unit, try to replace non-alphanumeric chars inside pattern: if re.search(r'[^a-zA-Z0-9\s\:]', bsu_p) and bsu_f != 'refersto' and bsu_f != 'citedby': if bsu_p.startswith('"') and bsu_p.endswith('"'): # is it ACC query? bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', "*", bsu_p) else: # it is WRD query bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', " ", bsu_p) if verbose and of.startswith('h') and req: print_warning(req, "Trying (%s,%s,%s)" % (cgi.escape(bsu_pn), cgi.escape(bsu_f), cgi.escape(bsu_m))) basic_search_unit_hitset = search_pattern(req=None, p=bsu_pn, f=bsu_f, m=bsu_m, of="id", ln=ln, wl=wl) if len(basic_search_unit_hitset) > 0: # we retain the new unit instead if of.startswith('h'): print_warning(req, _("No exact match found for %(x_query1)s, using %(x_query2)s instead...") % \ {'x_query1': "<em>" + cgi.escape(bsu_p) + "</em>", 'x_query2': "<em>" + cgi.escape(bsu_pn) + "</em>"}) basic_search_units[idx_unit][1] = bsu_pn basic_search_units_hitsets.append(basic_search_unit_hitset) else: # stage 2-3: no hits found either, propose nearest indexed terms: if of.startswith('h') and display_nearest_terms_box: if req: if bsu_f == "recid": print_warning(req, _("Requested record does not seem to exist.")) else: print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln)) return hitset_empty else: # stage 2-3: no hits found either, propose nearest indexed terms: if of.startswith('h') and display_nearest_terms_box: if req: if bsu_f == "recid": print_warning(req, _("Requested record does not seem to exist.")) else: print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln)) return hitset_empty if verbose and of.startswith("h"): t2 = os.times()[4] for idx_unit in range(0, len(basic_search_units)): print_warning(req, "Search stage 2: basic search unit %s gave %d hits." % (basic_search_units[idx_unit][1:], len(basic_search_units_hitsets[idx_unit]))) print_warning(req, "Search stage 2: execution took %.2f seconds." % (t2 - t1)) # search stage 3: apply boolean query for each search unit: if verbose and of.startswith("h"): t1 = os.times()[4] # let the initial set be the complete universe: hitset_in_any_collection = HitSet(trailing_bits=1) hitset_in_any_collection.discard(0) for idx_unit in xrange(len(basic_search_units)): this_unit_operation = basic_search_units[idx_unit][0] this_unit_hitset = basic_search_units_hitsets[idx_unit] if this_unit_operation == '+': hitset_in_any_collection.intersection_update(this_unit_hitset) elif this_unit_operation == '-': hitset_in_any_collection.difference_update(this_unit_hitset) elif this_unit_operation == '|': hitset_in_any_collection.union_update(this_unit_hitset) else: if of.startswith("h"): print_warning(req, "Invalid set operation %s." % cgi.escape(this_unit_operation), "Error") if len(hitset_in_any_collection) == 0: # no hits found, propose alternative boolean query: if of.startswith('h') and display_nearest_terms_box: nearestterms = [] for idx_unit in range(0, len(basic_search_units)): bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit] if bsu_p.startswith("%") and bsu_p.endswith("%"): bsu_p = "'" + bsu_p[1:-1] + "'" bsu_nbhits = len(basic_search_units_hitsets[idx_unit]) # create a similar query, but with the basic search unit only argd = {} argd.update(req.argd) argd['p'] = bsu_p argd['f'] = bsu_f nearestterms.append((bsu_p, bsu_nbhits, argd)) text = websearch_templates.tmpl_search_no_boolean_hits( ln=ln, nearestterms=nearestterms) print_warning(req, text) if verbose and of.startswith("h"): t2 = os.times()[4] print_warning(req, "Search stage 3: boolean query gave %d hits." % len(hitset_in_any_collection)) print_warning(req, "Search stage 3: execution took %.2f seconds." % (t2 - t1)) return hitset_in_any_collection def search_pattern_parenthesised(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0): """Search for complex pattern 'p' containing parenthesis within field 'f' according to matching type 'm'. Return hitset of recIDs. For more details on the parameters see 'search_pattern' """ _ = gettext_set_language(ln) spires_syntax_converter = SpiresToInvenioSyntaxConverter() spires_syntax_query = False # if the pattern uses SPIRES search syntax, convert it to Invenio syntax if spires_syntax_converter.is_applicable(p): spires_syntax_query = True p = spires_syntax_converter.convert_query(p) # sanity check: do not call parenthesised parser for search terms # like U(1): if not re_pattern_parens.search(p): return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl) # Try searching with parentheses try: parser = SearchQueryParenthesisedParser() # get a hitset with all recids result_hitset = HitSet(trailing_bits=1) # parse the query. The result is list of [op1, expr1, op2, expr2, ..., opN, exprN] parsing_result = parser.parse_query(p) if verbose and of.startswith("h"): print_warning(req, "Search stage 1: search_pattern_parenthesised() searched %s." % repr(p)) print_warning(req, "Search stage 1: search_pattern_parenthesised() returned %s." % repr(parsing_result)) # go through every pattern # calculate hitset for it # combine pattern's hitset with the result using the corresponding operator for index in xrange(0, len(parsing_result)-1, 2 ): current_operator = parsing_result[index] current_pattern = parsing_result[index+1] if CFG_INSPIRE_SITE and spires_syntax_query: # setting ap=0 to turn off approximate matching for 0 results. # Doesn't work well in combinations. # FIXME: The right fix involves collecting statuses for each # hitset, then showing a nearest terms box exactly once, # outside this loop. ap = 0 display_nearest_terms_box=False # obtain a hitset for the current pattern current_hitset = search_pattern(req, current_pattern, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl) # combine the current hitset with resulting hitset using the current operator if current_operator == '+': result_hitset = result_hitset & current_hitset elif current_operator == '-': result_hitset = result_hitset - current_hitset elif current_operator == '|': result_hitset = result_hitset | current_hitset else: assert False, "Unknown operator in search_pattern_parenthesised()" return result_hitset # If searching with parenteses fails, perform search ignoring parentheses except SyntaxError: print_warning(req, _("Search syntax misunderstood. Ignoring all parentheses in the query. If this doesn't help, please check your search and try again.")) # remove the parentheses in the query. Current implementation removes all the parentheses, # but it could be improved to romove only these that are not inside quotes p = p.replace('(', ' ') p = p.replace(')', ' ') return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl) def search_unit(p, f=None, m=None, wl=0): """Search for basic search unit defined by pattern 'p' and field 'f' and matching type 'm'. Return hitset of recIDs. All the parameters are assumed to have been previously washed. 'p' is assumed to be already a ``basic search unit'' so that it is searched as such and is not broken up in any way. Only wildcard and span queries are being detected inside 'p'. If CFG_WEBSEARCH_SYNONYM_KBRS is set and we are searching in one of the indexes that has defined runtime synonym knowledge base, then look up there and automatically enrich search results with results for synonyms. In case the wildcard limit (wl) is greater than 0 and this limit is reached an InvenioWebSearchWildcardLimitError will be raised. In case you want to call this function with no limit for the wildcard queries, wl should be 0. This function is suitable as a low-level API. """ ## create empty output results set: hitset = HitSet() if not p: # sanity checking return hitset ## eventually look up runtime synonyms: hitset_synonyms = HitSet() if CFG_WEBSEARCH_SYNONYM_KBRS.has_key(f): for p_synonym in get_synonym_terms(p, CFG_WEBSEARCH_SYNONYM_KBRS[f][0], CFG_WEBSEARCH_SYNONYM_KBRS[f][1]): if p_synonym != p: hitset_synonyms |= search_unit(p_synonym, f, m, wl) ## look up hits: if CFG_SOLR_URL and f == 'fulltext': # redirect to Solr/Lucene return search_unit_in_solr(p, f, m) if f == 'datecreated': hitset = search_unit_in_bibrec(p, p, 'c') elif f == 'datemodified': hitset = search_unit_in_bibrec(p, p, 'm') elif f == 'refersto': # we are doing search by the citation count hitset = search_unit_refersto(p) elif f == 'citedby': # we are doing search by the citation count hitset = search_unit_citedby(p) elif m == 'a' or m == 'r': # we are doing either phrase search or regexp search if f == 'fulltext': # FIXME: workaround for not having phrase index yet return search_pattern(None, p, f, 'w') index_id = get_index_id_from_field(f) if index_id != 0: hitset = search_unit_in_idxphrases(p, f, m, wl) else: hitset = search_unit_in_bibxxx(p, f, m, wl) elif p.startswith("cited:"): # we are doing search by the citation count hitset = search_unit_by_times_cited(p[6:]) else: # we are doing bibwords search by default hitset = search_unit_in_bibwords(p, f, m, wl=wl) ## merge synonym results and return total: hitset |= hitset_synonyms return hitset def search_unit_in_bibwords(word, f, m=None, decompress=zlib.decompress, wl=0): """Searches for 'word' inside bibwordsX table for field 'f' and returns hitset of recIDs.""" set = HitSet() # will hold output result set set_used = 0 # not-yet-used flag, to be able to circumvent set operations limit_reached = 0 # flag for knowing if the query limit has been reached # deduce into which bibwordsX table we will search: stemming_language = get_index_stemming_language(get_index_id_from_field("anyfield")) bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield") if f: index_id = get_index_id_from_field(f) if index_id: bibwordsX = "idxWORD%02dF" % index_id stemming_language = get_index_stemming_language(index_id) else: return HitSet() # word index f does not exist # wash 'word' argument and run query: word = string.replace(word, '*', '%') # we now use '*' as the truncation character words = string.split(word, "->", 1) # check for span query if len(words) == 2: word0 = re_word.sub('', words[0]) word1 = re_word.sub('', words[1]) if stemming_language: word0 = lower_index_term(word0) word1 = lower_index_term(word1) word0 = stem(word0, stemming_language) word1 = stem(word1, stemming_language) try: res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term BETWEEN %%s AND %%s" % bibwordsX, (wash_index_term(word0), wash_index_term(word1)), wildcard_limit = wl) except InvenioDbQueryWildcardLimitError, excp: res = excp.res limit_reached = 1 # set the limit reached flag to true else: if f == 'journal': pass # FIXME: quick hack for the journal index else: word = re_word.sub('', word) if stemming_language: word = lower_index_term(word) word = stem(word, stemming_language) if string.find(word, '%') >= 0: # do we have wildcard in the word? if f == 'journal': # FIXME: quick hack for the journal index # FIXME: we can run a sanity check here for all indexes res = () else: try: res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term LIKE %%s" % bibwordsX, (wash_index_term(word),), wildcard_limit = wl) except InvenioDbQueryWildcardLimitError, excp: res = excp.res limit_reached = 1 # set the limit reached flag to true else: res = run_sql("SELECT term,hitlist FROM %s WHERE term=%%s" % bibwordsX, (wash_index_term(word),)) # fill the result set: for word, hitlist in res: hitset_bibwrd = HitSet(hitlist) # add the results: if set_used: set.union_update(hitset_bibwrd) else: set = hitset_bibwrd set_used = 1 #check to see if the query limit was reached if limit_reached: #raise an exception, so we can print a nice message to the user raise InvenioWebSearchWildcardLimitError(set) # okay, return result set: return set def search_unit_in_idxphrases(p, f, type, wl=0): """Searches for phrase 'p' inside idxPHRASE*F table for field 'f' and returns hitset of recIDs found. The search type is defined by 'type' (e.g. equals to 'r' for a regexp search).""" set = HitSet() # will hold output result set set_used = 0 # not-yet-used flag, to be able to circumvent set operations limit_reached = 0 # flag for knowing if the query limit has been reached use_query_limit = False # flag for knowing if to limit the query results or not # deduce in which idxPHRASE table we will search: idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield") if f: index_id = get_index_id_from_field(f) if index_id: idxphraseX = "idxPHRASE%02dF" % index_id else: return HitSet() # phrase index f does not exist # detect query type (exact phrase, partial phrase, regexp): if type == 'r': query_addons = "REGEXP %s" query_params = (p,) use_query_limit = True else: p = string.replace(p, '*', '%') # we now use '*' as the truncation character ps = string.split(p, "->", 1) # check for span query: if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')): query_addons = "BETWEEN %s AND %s" query_params = (ps[0], ps[1]) use_query_limit = True else: if string.find(p, '%') > -1: query_addons = "LIKE %s" query_params = (p,) use_query_limit = True else: query_addons = "= %s" query_params = (p,) # special washing for fuzzy author index: if f in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor'): query_params_washed = () for query_param in query_params: query_params_washed += (wash_author_name(query_param),) query_params = query_params_washed # perform search: if use_query_limit: try: res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons), query_params, wildcard_limit=wl) except InvenioDbQueryWildcardLimitError, excp: res = excp.res limit_reached = 1 # set the limit reached flag to true else: res = run_sql("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons), query_params) # fill the result set: for word, hitlist in res: hitset_bibphrase = HitSet(hitlist) # add the results: if set_used: set.union_update(hitset_bibphrase) else: set = hitset_bibphrase set_used = 1 #check to see if the query limit was reached if limit_reached: #raise an exception, so we can print a nice message to the user raise InvenioWebSearchWildcardLimitError(set) # okay, return result set: return set def search_unit_in_bibxxx(p, f, type, wl=0): """Searches for pattern 'p' inside bibxxx tables for field 'f' and returns hitset of recIDs found. The search type is defined by 'type' (e.g. equals to 'r' for a regexp search).""" # FIXME: quick hack for the journal index if f == 'journal': return search_unit_in_bibwords(p, f, wl=wl) p_orig = p # saving for eventual future 'no match' reporting limit_reached = 0 # flag for knowing if the query limit has been reached use_query_limit = False # flag for knowing if to limit the query results or not query_addons = "" # will hold additional SQL code for the query query_params = () # will hold parameters for the query (their number may vary depending on TYPE argument) # wash arguments: f = string.replace(f, '*', '%') # replace truncation char '*' in field definition if type == 'r': query_addons = "REGEXP %s" query_params = (p,) use_query_limit = True else: p = string.replace(p, '*', '%') # we now use '*' as the truncation character ps = string.split(p, "->", 1) # check for span query: if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')): query_addons = "BETWEEN %s AND %s" query_params = (ps[0], ps[1]) use_query_limit = True else: if string.find(p, '%') > -1: query_addons = "LIKE %s" query_params = (p,) use_query_limit = True else: query_addons = "= %s" query_params = (p,) # construct 'tl' which defines the tag list (MARC tags) to search in: tl = [] if len(f) >= 2 and str(f[0]).isdigit() and str(f[1]).isdigit(): tl.append(f) # 'f' seems to be okay as it starts by two digits else: # deduce desired MARC tags on the basis of chosen 'f' tl = get_field_tags(f) if not tl: # f index does not exist, nevermind pass # okay, start search: l = [] # will hold list of recID that matched for t in tl: # deduce into which bibxxx table we will search: digit1, digit2 = int(t[0]), int(t[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) # construct and run query: if t == "001": if query_addons.find('BETWEEN') > -1 or query_addons.find('=') > -1: # verify that the params are integers (to avoid returning record 123 when searching for 123foo) try: query_params = tuple(int(param) for param in query_params) except ValueError: return HitSet() if use_query_limit: try: res = run_sql_with_limit("SELECT id FROM bibrec WHERE id %s" % query_addons, query_params, wildcard_limit=wl) except InvenioDbQueryWildcardLimitError, excp: res = excp.res limit_reached = 1 # set the limit reached flag to true else: res = run_sql("SELECT id FROM bibrec WHERE id %s" % query_addons, query_params) else: query = "SELECT bibx.id_bibrec FROM %s AS bx LEFT JOIN %s AS bibx ON bx.id=bibx.id_bibxxx WHERE bx.value %s" % \ (bx, bibx, query_addons) if len(t) != 6 or t[-1:]=='%': # wildcard query, or only the beginning of field 't' # is defined, so add wildcard character: query += " AND bx.tag LIKE %s" query_params = query_params + (t + '%',) else: # exact query for 't': query += " AND bx.tag=%s" query_params = query_params + (t,) if use_query_limit: try: res = run_sql_with_limit(query, query_params, wildcard_limit=wl) except InvenioDbQueryWildcardLimitError, excp: res = excp.res limit_reached = 1 # set the limit reached flag to true else: res = run_sql(query, query_params) # fill the result set: for id_bibrec in res: if id_bibrec[0]: l.append(id_bibrec[0]) # check no of hits found: nb_hits = len(l) # okay, return result set: set = HitSet(l) #check to see if the query limit was reached if limit_reached: #raise an exception, so we can print a nice message to the user raise InvenioWebSearchWildcardLimitError(set) return set def search_unit_in_solr(p, f=None, m=None): """ Query the Solr full-text index and return an intbitset corresponding to the result. Parameters (p,f,m) are usual search unit ones. """ if m and (m == 'a' or m == 'r'): # phrase/regexp query if p.startswith('%') and p.endswith('%'): p = p[1:-1] # fix for partial phrase p = '"' + p + '"' return solr_get_bitset(p, CFG_SOLR_URL) def search_unit_in_bibrec(datetext1, datetext2, type='c'): """ Return hitset of recIDs found that were either created or modified (according to 'type' arg being 'c' or 'm') from datetext1 until datetext2, inclusive. Does not pay attention to pattern, collection, anything. Useful to intersect later on with the 'real' query. """ set = HitSet() if type.startswith("m"): type = "modification_date" else: type = "creation_date" # by default we are searching for creation dates parts = datetext1.split('->') if len(parts) > 1 and datetext1 == datetext2: datetext1 = parts[0] datetext2 = parts[1] if datetext1 == datetext2: res = run_sql("SELECT id FROM bibrec WHERE %s LIKE %%s" % (type,), (datetext1 + '%',)) else: res = run_sql("SELECT id FROM bibrec WHERE %s>=%%s AND %s<=%%s" % (type, type), (datetext1, datetext2)) for row in res: set += row[0] return set def search_unit_by_times_cited(p): """ Return histset of recIDs found that are cited P times. Usually P looks like '10->23'. """ numstr = '"'+p+'"' #this is sort of stupid but since we may need to #get the records that do _not_ have cites, we have to #know the ids of all records, too #but this is needed only if bsu_p is 0 or 0 or 0->0 allrecs = [] if p == 0 or p == "0" or \ p.startswith("0->") or p.endswith("->0"): allrecs = HitSet(run_sql("SELECT id FROM bibrec")) return get_records_with_num_cites(numstr, allrecs) def search_unit_refersto(query): """ Search for records satisfying the query (e.g. author:ellis) and return list of records referred to by these records. """ if query: ahitset = search_pattern(p=query) if ahitset: return get_refersto_hitset(ahitset) else: return HitSet([]) else: return HitSet([]) def search_unit_citedby(query): """ Search for records satisfying the query (e.g. author:ellis) and return list of records cited by these records. """ if query: ahitset = search_pattern(p=query) if ahitset: return get_citedby_hitset(ahitset) else: return HitSet([]) else: return HitSet([]) def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, of="hb", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True): """Return dict of hitsets given by intersection of hitset with the collection universes.""" _ = gettext_set_language(ln) # search stage 4: intersect with the collection universe: if verbose and of.startswith("h"): t1 = os.times()[4] results = {} results_nbhits = 0 for coll in colls: results[coll] = hitset_in_any_collection & get_collection_reclist(coll) results_nbhits += len(results[coll]) if results_nbhits == 0: # no hits found, try to search in Home: results_in_Home = hitset_in_any_collection & get_collection_reclist(CFG_SITE_NAME) if len(results_in_Home) > 0: # some hits found in Home, so propose this search: if of.startswith("h") and display_nearest_terms_box: url = websearch_templates.build_search_url(req.argd, cc=CFG_SITE_NAME, c=[]) print_warning(req, _("No match found in collection %(x_collection)s. Other public collections gave %(x_url_open)s%(x_nb_hits)d hits%(x_url_close)s.") %\ {'x_collection': '<em>' + string.join([get_coll_i18nname(coll, ln, False) for coll in colls], ', ') + '</em>', 'x_url_open': '<a class="nearestterms" href="%s">' % (url), 'x_nb_hits': len(results_in_Home), 'x_url_close': '</a>'}) results = {} else: # no hits found in Home, recommend different search terms: if of.startswith("h") and display_nearest_terms_box: print_warning(req, _("No public collection matched your query. " "If you were looking for a non-public document, please choose " "the desired restricted collection first.")) results = {} if verbose and of.startswith("h"): t2 = os.times()[4] print_warning(req, "Search stage 4: intersecting with collection universe gave %d hits." % results_nbhits) print_warning(req, "Search stage 4: execution took %.2f seconds." % (t2 - t1)) return results def intersect_results_with_hitset(req, results, hitset, ap=0, aptext="", of="hb"): """Return intersection of search 'results' (a dict of hitsets with collection as key) with the 'hitset', i.e. apply 'hitset' intersection to each collection within search 'results'. If the final 'results' set is to be empty, and 'ap' (approximate pattern) is true, and then print the `warningtext' and return the original 'results' set unchanged. If 'ap' is false, then return empty results set. """ if ap: results_ap = copy.deepcopy(results) else: results_ap = {} # will return empty dict in case of no hits found nb_total = 0 for coll in results.keys(): results[coll].intersection_update(hitset) nb_total += len(results[coll]) if nb_total == 0: if of.startswith("h"): print_warning(req, aptext) results = results_ap return results def create_similarly_named_authors_link_box(author_name, ln=CFG_SITE_LANG): """Return a box similar to ``Not satisfied...'' one by proposing author searches for similar names. Namely, take AUTHOR_NAME and the first initial of the firstame (after comma) and look into author index whether authors with e.g. middle names exist. Useful mainly for CERN Library that sometimes contains name forms like Ellis-N, Ellis-Nick, Ellis-Nicolas all denoting the same person. The box isn't proposed if no similarly named authors are found to exist. """ # return nothing if not configured: if CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX == 0: return "" # return empty box if there is no initial: if re.match(r'[^ ,]+, [^ ]', author_name) is None: return "" # firstly find name comma initial: author_name_to_search = re.sub(r'^([^ ,]+, +[^ ,]).*$', '\\1', author_name) # secondly search for similar name forms: similar_author_names = {} for name in author_name_to_search, strip_accents(author_name_to_search): for tag in get_field_tags("author"): # deduce into which bibxxx table we will search: digit1, digit2 = int(tag[0]), int(tag[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) if len(tag) != 6 or tag[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value LIKE %%s AND bx.tag LIKE %%s""" % bx, (name + "%", tag + "%")) else: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value LIKE %%s AND bx.tag=%%s""" % bx, (name + "%", tag)) for row in res: similar_author_names[row[0]] = 1 # remove the original name and sort the list: try: del similar_author_names[author_name] except KeyError: pass # thirdly print the box: out = "" if similar_author_names: out_authors = similar_author_names.keys() out_authors.sort() tmp_authors = [] for out_author in out_authors: nbhits = get_nbhits_in_bibxxx(out_author, "author") if nbhits: tmp_authors.append((out_author, nbhits)) out += websearch_templates.tmpl_similar_author_names( authors=tmp_authors, ln=ln) return out def create_nearest_terms_box(urlargd, p, f, t='w', n=5, ln=CFG_SITE_LANG, intro_text_p=True): """Return text box containing list of 'n' nearest terms above/below 'p' for the field 'f' for matching type 't' (words/phrases) in language 'ln'. Propose new searches according to `urlargs' with the new words. If `intro_text_p' is true, then display the introductory message, otherwise print only the nearest terms in the box content. """ # load the right message language _ = gettext_set_language(ln) out = "" nearest_terms = [] if not p: # sanity check p = "." if p.startswith('%') and p.endswith('%'): p = p[1:-1] # fix for partial phrase index_id = get_index_id_from_field(f) if f == 'fulltext': if CFG_SOLR_URL: return _("No match found, please enter different search terms.") else: # FIXME: workaround for not having native phrase index yet t = 'w' # special indexes: if f == 'refersto': return _("There are no records referring to %s.") % cgi.escape(p) if f == 'citedby': return _("There are no records cited by %s.") % cgi.escape(p) # look for nearest terms: if t == 'w': nearest_terms = get_nearest_terms_in_bibwords(p, f, n, n) if not nearest_terms: return _("No word index is available for %s.") % \ ('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>') else: nearest_terms = [] if index_id: nearest_terms = get_nearest_terms_in_idxphrase(p, index_id, n, n) if f == 'datecreated' or f == 'datemodified': nearest_terms = get_nearest_terms_in_bibrec(p, f, n, n) if not nearest_terms: nearest_terms = get_nearest_terms_in_bibxxx(p, f, n, n) if not nearest_terms: return _("No phrase index is available for %s.") % \ ('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>') terminfo = [] for term in nearest_terms: if t == 'w': hits = get_nbhits_in_bibwords(term, f) else: if index_id: hits = get_nbhits_in_idxphrases(term, f) elif f == 'datecreated' or f == 'datemodified': hits = get_nbhits_in_bibrec(term, f) else: hits = get_nbhits_in_bibxxx(term, f) argd = {} argd.update(urlargd) # check which fields contained the requested parameter, and replace it. for (px, fx) in ('p', 'f'), ('p1', 'f1'), ('p2', 'f2'), ('p3', 'f3'): if px in argd: argd_px = argd[px] if t == 'w': # p was stripped of accents, to do the same: argd_px = strip_accents(argd_px) if f == argd[fx] or f == "anyfield" or f == "": if string.find(argd_px, p) > -1: argd[px] = string.replace(argd_px, p, term) break else: if string.find(argd_px, f+':'+p) > -1: if string.find(term.strip(), ' ') > -1: term = '"' + term + '"' argd[px] = string.replace(argd_px, f+':'+p, f+':'+term) break elif string.find(argd_px, f+':"'+p+'"') > -1: argd[px] = string.replace(argd_px, f+':"'+p+'"', f+':"'+term+'"') break elif string.find(argd_px, f+':\''+p+'\'') > -1: argd[px] = string.replace(argd_px, f+':\''+p+'\'', f+':\''+term+'\'') break terminfo.append((term, hits, argd)) intro = "" if intro_text_p: # add full leading introductory text if f: intro = _("Search term %(x_term)s inside index %(x_index)s did not match any record. Nearest terms in any collection are:") % \ {'x_term': "<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>", 'x_index': "<em>" + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + "</em>"} else: intro = _("Search term %s did not match any record. Nearest terms in any collection are:") % \ ("<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>") return websearch_templates.tmpl_nearest_term_box(p=p, ln=ln, f=f, terminfo=terminfo, intro=intro) def get_nearest_terms_in_bibwords(p, f, n_below, n_above): """Return list of +n -n nearest terms to word `p' in index for field `f'.""" nearest_words = [] # will hold the (sorted) list of nearest words to return # deduce into which bibwordsX table we will search: bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield") if f: index_id = get_index_id_from_field(f) if index_id: bibwordsX = "idxWORD%02dF" % index_id else: return nearest_words # firstly try to get `n' closest words above `p': res = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % bibwordsX, (p, n_above)) for row in res: nearest_words.append(row[0]) nearest_words.reverse() # secondly insert given word `p': nearest_words.append(p) # finally try to get `n' closest words below `p': res = run_sql("SELECT term FROM %s WHERE term>%%s ORDER BY term ASC LIMIT %%s" % bibwordsX, (p, n_below)) for row in res: nearest_words.append(row[0]) return nearest_words def get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above): """Browse (-n_above, +n_below) closest bibliographic phrases for the given pattern p in the given field idxPHRASE table, regardless of collection. Return list of [phrase1, phrase2, ... , phrase_n].""" if CFG_INSPIRE_SITE and index_id in (3, 15): # FIXME: workaround due to new fuzzy index return [p,] idxphraseX = "idxPHRASE%02dF" % index_id res_above = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above)) res_above = map(lambda x: x[0], res_above) res_above.reverse() res_below = run_sql("SELECT term FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below)) res_below = map(lambda x: x[0], res_below) return res_above + res_below def get_nearest_terms_in_idxphrase_with_collection(p, index_id, n_below, n_above, collection): """Browse (-n_above, +n_below) closest bibliographic phrases for the given pattern p in the given field idxPHRASE table, considering the collection (HitSet). Return list of [(phrase1, hitset), (phrase2, hitset), ... , (phrase_n, hitset)].""" idxphraseX = "idxPHRASE%02dF" % index_id res_above = run_sql("SELECT term,hitlist FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above * 3)) res_above = [(term, HitSet(hitlist) & collection) for term, hitlist in res_above] res_above = [(term, len(hitlist)) for term, hitlist in res_above if hitlist] res_below = run_sql("SELECT term,hitlist FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below * 3)) res_below = [(term, HitSet(hitlist) & collection) for term, hitlist in res_below] res_below = [(term, len(hitlist)) for term, hitlist in res_below if hitlist] res_above.reverse() return res_above[-n_above:] + res_below[:n_below] def get_nearest_terms_in_bibxxx(p, f, n_below, n_above): """Browse (-n_above, +n_below) closest bibliographic phrases for the given pattern p in the given field f, regardless of collection. Return list of [phrase1, phrase2, ... , phrase_n].""" ## determine browse field: if not f and string.find(p, ":") > 0: # does 'p' contain ':'? f, p = string.split(p, ":", 1) # FIXME: quick hack for the journal index if f == 'journal': return get_nearest_terms_in_bibwords(p, f, n_below, n_above) ## We are going to take max(n_below, n_above) as the number of ## values to ferch from bibXXx. This is needed to work around ## MySQL UTF-8 sorting troubles in 4.0.x. Proper solution is to ## use MySQL 4.1.x or our own idxPHRASE in the future. index_id = get_index_id_from_field(f) if index_id: return get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above) n_fetch = 2*max(n_below, n_above) ## construct 'tl' which defines the tag list (MARC tags) to search in: tl = [] if str(f[0]).isdigit() and str(f[1]).isdigit(): tl.append(f) # 'f' seems to be okay as it starts by two digits else: # deduce desired MARC tags on the basis of chosen 'f' tl = get_field_tags(f) ## start browsing to fetch list of hits: browsed_phrases = {} # will hold {phrase1: 1, phrase2: 1, ..., phraseN: 1} dict of browsed phrases (to make them unique) # always add self to the results set: browsed_phrases[p.startswith("%") and p.endswith("%") and p[1:-1] or p] = 1 for t in tl: # deduce into which bibxxx table we will search: digit1, digit2 = int(t[0]), int(t[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) # firstly try to get `n' closest phrases above `p': if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value<%%s AND bx.tag LIKE %%s ORDER BY bx.value DESC LIMIT %%s""" % bx, (p, t + "%", n_fetch)) else: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value<%%s AND bx.tag=%%s ORDER BY bx.value DESC LIMIT %%s""" % bx, (p, t, n_fetch)) for row in res: browsed_phrases[row[0]] = 1 # secondly try to get `n' closest phrases equal to or below `p': if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value>=%%s AND bx.tag LIKE %%s ORDER BY bx.value ASC LIMIT %%s""" % bx, (p, t + "%", n_fetch)) else: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value>=%%s AND bx.tag=%%s ORDER BY bx.value ASC LIMIT %%s""" % bx, (p, t, n_fetch)) for row in res: browsed_phrases[row[0]] = 1 # select first n words only: (this is needed as we were searching # in many different tables and so aren't sure we have more than n # words right; this of course won't be needed when we shall have # one ACC table only for given field): phrases_out = browsed_phrases.keys() phrases_out.sort(lambda x, y: cmp(string.lower(strip_accents(x)), string.lower(strip_accents(y)))) # find position of self: try: idx_p = phrases_out.index(p) except: idx_p = len(phrases_out)/2 # return n_above and n_below: return phrases_out[max(0, idx_p-n_above):idx_p+n_below] def get_nearest_terms_in_bibrec(p, f, n_below, n_above): """Return list of nearest terms and counts from bibrec table. p is usually a date, and f either datecreated or datemodified. Note: below/above count is very approximative, not really respected. """ col = 'creation_date' if f == 'datemodified': col = 'modification_date' res_above = run_sql("""SELECT DATE_FORMAT(%s,'%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s') FROM bibrec WHERE %s < %%s ORDER BY %s DESC LIMIT %%s""" % (col, col, col), (p, n_above)) res_below = run_sql("""SELECT DATE_FORMAT(%s,'%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s') FROM bibrec WHERE %s > %%s ORDER BY %s ASC LIMIT %%s""" % (col, col, col), (p, n_below)) out = set([]) for row in res_above: out.add(row[0]) for row in res_below: out.add(row[0]) out_list = list(out) out_list.sort() return list(out_list) def get_nbhits_in_bibrec(term, f): """Return number of hits in bibrec table. term is usually a date, and f is either 'datecreated' or 'datemodified'.""" col = 'creation_date' if f == 'datemodified': col = 'modification_date' res = run_sql("SELECT COUNT(*) FROM bibrec WHERE %s LIKE %%s" % (col,), (term + '%',)) return res[0][0] def get_nbhits_in_bibwords(word, f): """Return number of hits for word 'word' inside words index for field 'f'.""" out = 0 # deduce into which bibwordsX table we will search: bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield") if f: index_id = get_index_id_from_field(f) if index_id: bibwordsX = "idxWORD%02dF" % index_id else: return 0 if word: res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % bibwordsX, (word,)) for hitlist in res: out += len(HitSet(hitlist[0])) return out def get_nbhits_in_idxphrases(word, f): """Return number of hits for word 'word' inside phrase index for field 'f'.""" out = 0 # deduce into which bibwordsX table we will search: idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield") if f: index_id = get_index_id_from_field(f) if index_id: idxphraseX = "idxPHRASE%02dF" % index_id else: return 0 if word: res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % idxphraseX, (word,)) for hitlist in res: out += len(HitSet(hitlist[0])) return out def get_nbhits_in_bibxxx(p, f): """Return number of hits for word 'word' inside words index for field 'f'.""" ## determine browse field: if not f and string.find(p, ":") > 0: # does 'p' contain ':'? f, p = string.split(p, ":", 1) # FIXME: quick hack for the journal index if f == 'journal': return get_nbhits_in_bibwords(p, f) ## construct 'tl' which defines the tag list (MARC tags) to search in: tl = [] if str(f[0]).isdigit() and str(f[1]).isdigit(): tl.append(f) # 'f' seems to be okay as it starts by two digits else: # deduce desired MARC tags on the basis of chosen 'f' tl = get_field_tags(f) # start searching: recIDs = {} # will hold dict of {recID1: 1, recID2: 1, ..., } (unique recIDs, therefore) for t in tl: # deduce into which bibxxx table we will search: digit1, digit2 = int(t[0]), int(t[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx WHERE bx.value=%%s AND bx.tag LIKE %%s AND bibx.id_bibxxx=bx.id""" % (bibx, bx), (p, t + "%")) else: res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx WHERE bx.value=%%s AND bx.tag=%%s AND bibx.id_bibxxx=bx.id""" % (bibx, bx), (p, t)) for row in res: recIDs[row[0]] = 1 return len(recIDs) def get_mysql_recid_from_aleph_sysno(sysno): """Returns DB's recID for ALEPH sysno passed in the argument (e.g. "002379334CER"). Returns None in case of failure.""" out = None res = run_sql("""SELECT bb.id_bibrec FROM bibrec_bib97x AS bb, bib97x AS b WHERE b.value=%s AND b.tag='970__a' AND bb.id_bibxxx=b.id""", (sysno,)) if res: out = res[0][0] return out def guess_primary_collection_of_a_record(recID): """Return primary collection name a record recid belongs to, by testing 980 identifier. May lead to bad guesses when a collection is defined dynamically via dbquery. In that case, return 'CFG_SITE_NAME'.""" out = CFG_SITE_NAME dbcollids = get_fieldvalues(recID, "980__a") if dbcollids: for dbcollid in dbcollids: dbquery = "collection:" + dbcollid res = run_sql("SELECT name FROM collection WHERE dbquery=%s", (dbquery,)) if res: out = res[0][0] break if CFG_CERN_SITE: # dirty hack for ATLAS collections at CERN: if out in ('ATLAS Communications', 'ATLAS Internal Notes'): for alternative_collection in ('ATLAS Communications Physics', 'ATLAS Communications General', 'ATLAS Internal Notes Physics', 'ATLAS Internal Notes General',): if recID in get_collection_reclist(alternative_collection): out = alternative_collection break return out _re_collection_url = re.compile('/collection/(.+)') def guess_collection_of_a_record(recID, referer=None, recreate_cache_if_needed=True): """Return collection name a record recid belongs to, by first testing the referer URL if provided and otherwise returning the primary collection.""" if referer: dummy, hostname, path, dummy, query, dummy = urlparse.urlparse(referer) #requests can come from different invenio installations, with different collections if CFG_SITE_URL.find(hostname) < 0: return guess_primary_collection_of_a_record(recID) g = _re_collection_url.match(path) if g: name = urllib.unquote_plus(g.group(1)) #check if this collection actually exist (also normalize the name if case-insensitive) name = get_coll_normalised_name(name) if name and recID in get_collection_reclist(name): return name elif path.startswith('/search'): if recreate_cache_if_needed: collection_reclist_cache.recreate_cache_if_needed() query = cgi.parse_qs(query) for name in query.get('cc', []) + query.get('c', []): name = get_coll_normalised_name(name) if name and recID in get_collection_reclist(name, recreate_cache_if_needed=False): return name return guess_primary_collection_of_a_record(recID) def is_record_in_any_collection(recID, recreate_cache_if_needed=True): """Return True if the record belongs to at least one collection. This is a good, although not perfect, indicator to guess if webcoll has already run after this record has been entered into the system. """ if recreate_cache_if_needed: collection_reclist_cache.recreate_cache_if_needed() for name in collection_reclist_cache.cache.keys(): if recID in get_collection_reclist(name, recreate_cache_if_needed=False): return True return False def get_all_collections_of_a_record(recID, recreate_cache_if_needed=True): """Return all the collection names a record belongs to. Note this function is O(n_collections).""" ret = [] if recreate_cache_if_needed: collection_reclist_cache.recreate_cache_if_needed() for name in collection_reclist_cache.cache.keys(): if recID in get_collection_reclist(name, recreate_cache_if_needed=False): ret.append(name) return ret def get_tag_name(tag_value, prolog="", epilog=""): """Return tag name from the known tag value, by looking up the 'tag' table. Return empty string in case of failure. Example: input='100__%', output=first author'.""" out = "" res = run_sql("SELECT name FROM tag WHERE value=%s", (tag_value,)) if res: out = prolog + res[0][0] + epilog return out def get_fieldcodes(): """Returns a list of field codes that may have been passed as 'search options' in URL. Example: output=['subject','division'].""" out = [] res = run_sql("SELECT DISTINCT(code) FROM field") for row in res: out.append(row[0]) return out def get_field_name(code): """Return the corresponding field_name given the field code. e.g. reportnumber -> report number.""" res = run_sql("SELECT name FROM field WHERE code=%s", (code, )) if res: return res[0][0] else: return "" def get_field_tags(field): """Returns a list of MARC tags for the field code 'field'. Returns empty list in case of error. Example: field='author', output=['100__%','700__%'].""" out = [] query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag ORDER BY ft.score DESC""" res = run_sql(query, (field, )) for val in res: out.append(val[0]) return out - -def get_fieldvalues(recIDs, tag, repetitive_values=True): - """ - Return list of field values for field TAG for the given record ID - or list of record IDs. (RECIDS can be both an integer or a list - of integers.) - - If REPETITIVE_VALUES is set to True, then return all values even - if they are doubled. If set to False, then return unique values - only. - """ - out = [] - if isinstance(recIDs, (int, long)): - recIDs =[recIDs,] - if not isinstance(recIDs, (list, tuple)): - return [] - if len(recIDs) == 0: - return [] - if tag == "001___": - # we have asked for tag 001 (=recID) that is not stored in bibXXx tables - out = [str(recID) for recID in recIDs] - else: - # we are going to look inside bibXXx tables - digits = tag[0:2] - try: - intdigits = int(digits) - if intdigits < 0 or intdigits > 99: - raise ValueError - except ValueError: - # invalid tag value asked for - return [] - bx = "bib%sx" % digits - bibx = "bibrec_bib%sx" % digits - queryparam = [] - for recID in recIDs: - queryparam.append(recID) - if not repetitive_values: - queryselect = "DISTINCT(bx.value)" - else: - queryselect = "bx.value" - query = "SELECT %s FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec IN (%s) " \ - " AND bx.id=bibx.id_bibxxx AND bx.tag LIKE %%s " \ - " ORDER BY bibx.field_number, bx.tag ASC" % \ - (queryselect, bx, bibx, ("%s,"*len(queryparam))[:-1]) - res = run_sql(query, tuple(queryparam) + (tag,)) - for row in res: - out.append(row[0]) - return out - def get_fieldvalues_alephseq_like(recID, tags_in, can_see_hidden=False): """Return buffer of ALEPH sequential-like textual format with fields found in the list TAGS_IN for record RECID. If can_see_hidden is True, just print everything. Otherwise hide fields from CFG_BIBFORMAT_HIDDEN_TAGS. """ out = "" if type(tags_in) is not list: tags_in = [tags_in,] if len(tags_in) == 1 and len(tags_in[0]) == 6: ## case A: one concrete subfield asked, so print its value if found ## (use with care: can mislead if field has multiple occurrences) out += string.join(get_fieldvalues(recID, tags_in[0]),"\n") else: ## case B: print our "text MARC" format; works safely all the time # find out which tags to output: dict_of_tags_out = {} if not tags_in: for i in range(0, 10): for j in range(0, 10): dict_of_tags_out["%d%d%%" % (i, j)] = 1 else: for tag in tags_in: if len(tag) == 0: for i in range(0, 10): for j in range(0, 10): dict_of_tags_out["%d%d%%" % (i, j)] = 1 elif len(tag) == 1: for j in range(0, 10): dict_of_tags_out["%s%d%%" % (tag, j)] = 1 elif len(tag) < 5: dict_of_tags_out["%s%%" % tag] = 1 elif tag >= 6: dict_of_tags_out[tag[0:5]] = 1 tags_out = dict_of_tags_out.keys() tags_out.sort() # search all bibXXx tables as needed: for tag in tags_out: digits = tag[0:2] try: intdigits = int(digits) if intdigits < 0 or intdigits > 99: raise ValueError except ValueError: # invalid tag value asked for continue if tag.startswith("001") or tag.startswith("00%"): if out: out += "\n" out += "%09d %s %d" % (recID, "001__", recID) bx = "bib%sx" % digits bibx = "bibrec_bib%sx" % digits query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx) res = run_sql(query, (recID, str(tag)+'%')) # go through fields: field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] printme = True #check the stuff in hiddenfields if not can_see_hidden: for htag in CFG_BIBFORMAT_HIDDEN_TAGS: ltag = len(htag) samelenfield = field[0:ltag] if samelenfield == htag: printme = False if ind1 == "_": ind1 = "" if ind2 == "_": ind2 = "" # print field tag if printme: if field_number != field_number_old or field[:-1] != field_old[:-1]: if out: out += "\n" out += "%09d %s " % (recID, field[:5]) field_number_old = field_number field_old = field # print subfield value if field[0:2] == "00" and field[-1:] == "_": out += value else: out += "$$%s%s" % (field[-1:], value) return out def record_exists(recID): """Return 1 if record RECID exists. Return 0 if it doesn't exist. Return -1 if it exists but is marked as deleted. """ out = 0 res = run_sql("SELECT id FROM bibrec WHERE id=%s", (recID,), 1) if res: try: # if recid is '123foo', mysql will return id=123, and we don't want that recID = int(recID) except ValueError: return 0 # record exists; now check whether it isn't marked as deleted: dbcollids = get_fieldvalues(recID, "980__%") if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids): out = -1 # exists, but marked as deleted else: out = 1 # exists fine return out def record_empty(recID): """ Is this record empty, e.g. has only 001, waiting for integration? @param recID: the record identifier. @type recID: int @return: 1 if the record is empty, 0 otherwise. @rtype: int """ record = get_record(recID) if record is None or len(record) < 2: return 1 else: return 0 def record_public_p(recID, recreate_cache_if_needed=True): """Return 1 if the record is public, i.e. if it can be found in the Home collection. Return 0 otherwise. """ return recID in get_collection_reclist(CFG_SITE_NAME, recreate_cache_if_needed=recreate_cache_if_needed) def get_creation_date(recID, fmt="%Y-%m-%d"): "Returns the creation date of the record 'recID'." out = "" res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1) if res: out = res[0][0] return out def get_modification_date(recID, fmt="%Y-%m-%d"): "Returns the date of last modification for the record 'recID'." out = "" res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1) if res: out = res[0][0] return out def print_warning(req, msg, msg_type='', prologue='<br />', epilogue='<br />'): "Prints warning message and flushes output." if req and msg: req.write(websearch_templates.tmpl_print_warning( msg = msg, type = msg_type, prologue = prologue, epilogue = epilogue, )) return def print_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=10, aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="", sc=1, pl_in_url="", d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="", cpu_time=-1, middle_only=0): """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time. Also, prints navigation links (beg/next/prev/end) inside the results set. If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links. This is suitable for displaying navigation links at the bottom of the search results page.""" # sanity check: if jrec < 1: jrec = 1 if jrec > nb_found: jrec = max(nb_found-rg+1, 1) return websearch_templates.tmpl_print_search_info( ln = ln, collection = collection, aas = aas, collection_name = get_coll_i18nname(collection, ln, False), collection_id = get_colID(collection), middle_only = middle_only, rg = rg, nb_found = nb_found, sf = sf, so = so, rm = rm, of = of, ot = ot, p = p, f = f, p1 = p1, p2 = p2, p3 = p3, f1 = f1, f2 = f2, f3 = f3, m1 = m1, m2 = m2, m3 = m3, op1 = op1, op2 = op2, pl_in_url = pl_in_url, d1y = d1y, d1m = d1m, d1d = d1d, d2y = d2y, d2m = d2m, d2d = d2d, dt = dt, jrec = jrec, sc = sc, sp = sp, all_fieldcodes = get_fieldcodes(), cpu_time = cpu_time, ) def print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=10, aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="", sc=1, pl_in_url="", d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="", cpu_time=-1, middle_only=0): """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time. Also, prints navigation links (beg/next/prev/end) inside the results set. If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links. This is suitable for displaying navigation links at the bottom of the search results page.""" out = "" # sanity check: if jrec < 1: jrec = 1 if jrec > nb_found: jrec = max(nb_found-rg+1, 1) return websearch_templates.tmpl_print_hosted_search_info( ln = ln, collection = collection, aas = aas, collection_name = get_coll_i18nname(collection, ln, False), collection_id = get_colID(collection), middle_only = middle_only, rg = rg, nb_found = nb_found, sf = sf, so = so, rm = rm, of = of, ot = ot, p = p, f = f, p1 = p1, p2 = p2, p3 = p3, f1 = f1, f2 = f2, f3 = f3, m1 = m1, m2 = m2, m3 = m3, op1 = op1, op2 = op2, pl_in_url = pl_in_url, d1y = d1y, d1m = d1m, d1d = d1d, d2y = d2y, d2m = d2m, d2d = d2d, dt = dt, jrec = jrec, sc = sc, sp = sp, all_fieldcodes = get_fieldcodes(), cpu_time = cpu_time, ) def print_results_overview(colls, results_final_nb_total, results_final_nb, cpu_time, ln=CFG_SITE_LANG, ec=[], hosted_colls_potential_results_p=False): """Prints results overview box with links to particular collections below.""" out = "" new_colls = [] for coll in colls: new_colls.append({ 'id': get_colID(coll), 'code': coll, 'name': get_coll_i18nname(coll, ln, False), }) return websearch_templates.tmpl_print_results_overview( ln = ln, results_final_nb_total = results_final_nb_total, results_final_nb = results_final_nb, cpu_time = cpu_time, colls = new_colls, ec = ec, hosted_colls_potential_results_p = hosted_colls_potential_results_p, ) def print_hosted_results(url_and_engine, ln=CFG_SITE_LANG, of=None, req=None, no_records_found=False, search_timed_out=False, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS): """Prints the full results of a hosted collection""" if of.startswith("h"): if no_records_found: return "<br />No results found." if search_timed_out: return "<br />The search engine did not respond in time." return websearch_templates.tmpl_print_hosted_results( url_and_engine=url_and_engine, ln=ln, of=of, req=req, limit=limit ) def sort_records(req, recIDs, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=CFG_SITE_LANG): """Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'. If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by 'sort pattern', for example "sort by report number that starts by CERN-PS". Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly.""" _ = gettext_set_language(ln) ## check arguments: if not sort_field: return recIDs if len(recIDs) > CFG_WEBSEARCH_NB_RECORDS_TO_SORT: if of.startswith('h'): print_warning(req, _("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order.") % CFG_WEBSEARCH_NB_RECORDS_TO_SORT, "Warning") return recIDs sort_fields = string.split(sort_field, ",") recIDs_dict = {} recIDs_out = [] ## first deduce sorting MARC tag out of the 'sort_field' argument: tags = [] for sort_field in sort_fields: if sort_field and str(sort_field[0:2]).isdigit(): # sort_field starts by two digits, so this is probably a MARC tag already tags.append(sort_field) else: # let us check the 'field' table query = """SELECT DISTINCT(t.value) FROM tag AS t, field_tag AS ft, field AS f WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag ORDER BY ft.score DESC""" res = run_sql(query, (sort_field, )) if res: for row in res: tags.append(row[0]) else: if of.startswith('h'): print_warning(req, _("Sorry, %s does not seem to be a valid sort option. Choosing title sort instead.") % cgi.escape(sort_field), "Error") tags.append("245__a") if verbose >= 3: print_warning(req, "Sorting by tags %s." % cgi.escape(repr(tags))) if sort_pattern: print_warning(req, "Sorting preferentially by %s." % cgi.escape(sort_pattern)) ## check if we have sorting tag defined: if tags: # fetch the necessary field values: for recID in recIDs: val = "" # will hold value for recID according to which sort vals = [] # will hold all values found in sorting tag for recID for tag in tags: if CFG_CERN_SITE and tag == '773__c': # CERN hack: journal sorting # 773__c contains page numbers, e.g. 3-13, and we want to sort by 3, and numerically: vals.extend(["%050s" % x.split("-",1)[0] for x in get_fieldvalues(recID, tag)]) else: vals.extend(get_fieldvalues(recID, tag)) if sort_pattern: # try to pick that tag value that corresponds to sort pattern bingo = 0 for v in vals: if v.lower().startswith(sort_pattern.lower()): # bingo! bingo = 1 val = v break if not bingo: # sort_pattern not present, so add other vals after spaces val = sort_pattern + " " + string.join(vals) else: # no sort pattern defined, so join them all together val = string.join(vals) val = strip_accents(val.lower()) # sort values regardless of accents and case if recIDs_dict.has_key(val): recIDs_dict[val].append(recID) else: recIDs_dict[val] = [recID] # sort them: recIDs_dict_keys = recIDs_dict.keys() recIDs_dict_keys.sort() # now that keys are sorted, create output array: for k in recIDs_dict_keys: for s in recIDs_dict[k]: recIDs_out.append(s) # ascending or descending? if sort_order == 'a': recIDs_out.reverse() # okay, we are done return recIDs_out else: # good, no sort needed return recIDs def print_records(req, recIDs, jrec=1, rg=10, format='hb', ot='', ln=CFG_SITE_LANG, relevances=[], relevances_prologue="(", relevances_epilogue="%%)", decompress=zlib.decompress, search_pattern='', print_records_prologue_p=True, print_records_epilogue_p=True, verbose=0, tab='', sf='', so='d', sp='', rm=''): """ Prints list of records 'recIDs' formatted according to 'format' in groups of 'rg' starting from 'jrec'. Assumes that the input list 'recIDs' is sorted in reverse order, so it counts records from tail to head. A value of 'rg=-9999' means to print all records: to be used with care. Print also list of RELEVANCES for each record (if defined), in between RELEVANCE_PROLOGUE and RELEVANCE_EPILOGUE. Print prologue and/or epilogue specific to 'format' if 'print_records_prologue_p' and/or print_records_epilogue_p' are True. 'sf' is sort field and 'rm' is ranking method that are passed here only for proper linking purposes: e.g. when a certain ranking method or a certain sort field was selected, keep it selected in any dynamic search links that may be printed. """ # load the right message language _ = gettext_set_language(ln) # sanity checking: if req is None: return # get user_info (for formatting based on user) if isinstance(req, cStringIO.OutputType): user_info = {} else: user_info = collect_user_info(req) if len(recIDs): nb_found = len(recIDs) if rg == -9999: # print all records rg = nb_found else: rg = abs(rg) if jrec < 1: # sanity checks jrec = 1 if jrec > nb_found: jrec = max(nb_found-rg+1, 1) # will print records from irec_max to irec_min excluded: irec_max = nb_found - jrec irec_min = nb_found - jrec - rg if irec_min < 0: irec_min = -1 if irec_max >= nb_found: irec_max = nb_found - 1 #req.write("%s:%d-%d" % (recIDs, irec_min, irec_max)) if format.startswith('x'): # print header if needed if print_records_prologue_p: print_records_prologue(req, format) # print records recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)] format_records(recIDs_to_print, format, ln=ln, search_pattern=search_pattern, record_separator="\n", user_info=user_info, req=req) # print footer if needed if print_records_epilogue_p: print_records_epilogue(req, format) elif format.startswith('t') or str(format[0:3]).isdigit(): # we are doing plain text output: for irec in range(irec_max, irec_min, -1): x = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) req.write(x) if x: req.write('\n') elif format == 'excel': recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)] create_excel(recIDs=recIDs_to_print, req=req, ln=ln, ot=ot) else: # we are doing HTML output: if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"): # portfolio and on-the-fly formats: for irec in range(irec_max, irec_min, -1): req.write(print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)) elif format.startswith("hb"): # HTML brief format: display_add_to_basket = True if user_info: if user_info['email'] == 'guest': if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS > 4: display_add_to_basket = False else: if not user_info['precached_usebaskets']: display_add_to_basket = False req.write(websearch_templates.tmpl_record_format_htmlbrief_header( ln = ln)) for irec in range(irec_max, irec_min, -1): row_number = jrec+irec_max-irec recid = recIDs[irec] if relevances and relevances[irec]: relevance = relevances[irec] else: relevance = '' record = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) req.write(websearch_templates.tmpl_record_format_htmlbrief_body( ln = ln, recid = recid, row_number = row_number, relevance = relevance, record = record, relevances_prologue = relevances_prologue, relevances_epilogue = relevances_epilogue, display_add_to_basket = display_add_to_basket )) req.write(websearch_templates.tmpl_record_format_htmlbrief_footer( ln = ln, display_add_to_basket = display_add_to_basket)) elif format.startswith("hd"): # HTML detailed format: for irec in range(irec_max, irec_min, -1): if record_exists(recIDs[irec]) == -1: print_warning(req, _("The record has been deleted.")) continue unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(recIDs[irec])), recIDs[irec], ln=ln) ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()] ordered_tabs_id.sort(lambda x,y: cmp(x[1],y[1])) link_ln = '' if ln != CFG_SITE_LANG: link_ln = '?ln=%s' % ln recid = recIDs[irec] recid_to_display = recid # Record ID used to build the URL. if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: try: recid_to_display = get_fieldvalues(recid, CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG)[0] except IndexError: # No external sysno is available, keep using # internal recid. pass citedbynum = 0 #num of citations, to be shown in the cit tab references = -1 #num of references if CFG_BIBRANK_SHOW_CITATION_LINKS: citedbynum = get_cited_by_count(recid) if not CFG_CERN_SITE:#FIXME:should be replaced by something like CFG_SHOW_REFERENCES reftag = "" reftags = get_field_tags("reference") if reftags: reftag = reftags[0] tmprec = get_record(recid) if reftag and len(reftag) > 4: references = len(record_get_field_instances(tmprec, reftag[0:3], reftag[3], reftag[4])) tabs = [(unordered_tabs[tab_id]['label'], \ '%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid_to_display, tab_id, link_ln), \ tab_id == tab, unordered_tabs[tab_id]['enabled']) \ for (tab_id, order) in ordered_tabs_id if unordered_tabs[tab_id]['visible'] == True] # load content if tab == 'usage': req.write(webstyle_templates.detailed_record_container_top(recIDs[irec], tabs, ln, citationnum=citedbynum, referencenum=references)) r = calculate_reading_similarity_list(recIDs[irec], "downloads") downloadsimilarity = None downloadhistory = None #if r: # downloadsimilarity = r if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS: downloadhistory = create_download_history_graph_and_box(recIDs[irec], ln) r = calculate_reading_similarity_list(recIDs[irec], "pageviews") viewsimilarity = None if r: viewsimilarity = r content = websearch_templates.tmpl_detailed_record_statistics(recIDs[irec], ln, downloadsimilarity=downloadsimilarity, downloadhistory=downloadhistory, viewsimilarity=viewsimilarity) req.write(content) req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec], tabs, ln)) elif tab == 'citations': recid = recIDs[irec] req.write(webstyle_templates.detailed_record_container_top(recid, tabs, ln, citationnum=citedbynum, referencenum=references)) req.write(websearch_templates.tmpl_detailed_record_citations_prologue(recid, ln)) # Citing citinglist = calculate_cited_by_list(recid) req.write(websearch_templates.tmpl_detailed_record_citations_citing_list(recid, ln, citinglist, sf=sf, so=so, sp=sp, rm=rm)) # Self-cited selfcited = get_self_cited_by(recid) req.write(websearch_templates.tmpl_detailed_record_citations_self_cited(recid, ln, selfcited=selfcited, citinglist=citinglist)) # Co-cited s = calculate_co_cited_with_list(recid) cociting = None if s: cociting = s req.write(websearch_templates.tmpl_detailed_record_citations_co_citing(recid, ln, cociting=cociting)) # Citation history, if needed citationhistory = None if citinglist: citationhistory = create_citation_history_graph_and_box(recid, ln) #debug if verbose > 3: print_warning(req, "Citation graph debug: " + \ str(len(citationhistory))) req.write(websearch_templates.tmpl_detailed_record_citations_citation_history(recid, ln, citationhistory)) req.write(websearch_templates.tmpl_detailed_record_citations_epilogue(recid, ln)) req.write(webstyle_templates.detailed_record_container_bottom(recid, tabs, ln)) elif tab == 'references': req.write(webstyle_templates.detailed_record_container_top(recIDs[irec], tabs, ln, citationnum=citedbynum, referencenum=references)) req.write(format_record(recIDs[irec], 'HDREF', ln=ln, user_info=user_info, verbose=verbose)) req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec], tabs, ln)) elif tab == 'keywords': import bibclassify_webinterface recid = recIDs[irec] bibclassify_webinterface.main_page(req, recid, tabs, ln, webstyle_templates) else: # Metadata tab req.write(webstyle_templates.detailed_record_container_top(recIDs[irec], tabs, ln, show_short_rec_p=False, citationnum=citedbynum, referencenum=references)) creationdate = None modificationdate = None if record_exists(recIDs[irec]) == 1: creationdate = get_creation_date(recIDs[irec]) modificationdate = get_modification_date(recIDs[irec]) content = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) content = websearch_templates.tmpl_detailed_record_metadata( recID = recIDs[irec], ln = ln, format = format, creationdate = creationdate, modificationdate = modificationdate, content = content) req.write(content) req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec], tabs, ln, creationdate=creationdate, modificationdate=modificationdate, show_short_rec_p=False)) if len(tabs) > 0: # Add the mini box at bottom of the page if CFG_WEBCOMMENT_ALLOW_REVIEWS: from invenio.webcomment import get_mini_reviews reviews = get_mini_reviews(recid = recIDs[irec], ln=ln) else: reviews = '' actions = format_record(recIDs[irec], 'HDACT', ln=ln, user_info=user_info, verbose=verbose) files = format_record(recIDs[irec], 'HDFILE', ln=ln, user_info=user_info, verbose=verbose) req.write(webstyle_templates.detailed_record_mini_panel(recIDs[irec], ln, format, files=files, reviews=reviews, actions=actions)) else: # Other formats for irec in range(irec_max, irec_min, -1): req.write(print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)) else: print_warning(req, _("Use different search terms.")) def print_records_prologue(req, format, cc=None): """ Print the appropriate prologue for list of records in the given format. """ prologue = "" # no prologue needed for HTML or Text formats if format.startswith('xm'): prologue = websearch_templates.tmpl_xml_marc_prologue() elif format.startswith('xn'): prologue = websearch_templates.tmpl_xml_nlm_prologue() elif format.startswith('xw'): prologue = websearch_templates.tmpl_xml_refworks_prologue() elif format.startswith('xr'): prologue = websearch_templates.tmpl_xml_rss_prologue(cc=cc) elif format.startswith('xe'): prologue = websearch_templates.tmpl_xml_endnote_prologue() elif format.startswith('xo'): prologue = websearch_templates.tmpl_xml_mods_prologue() elif format.startswith('xp'): prologue = websearch_templates.tmpl_xml_podcast_prologue(cc=cc) elif format.startswith('x'): prologue = websearch_templates.tmpl_xml_default_prologue() req.write(prologue) def print_records_epilogue(req, format): """ Print the appropriate epilogue for list of records in the given format. """ epilogue = "" # no epilogue needed for HTML or Text formats if format.startswith('xm'): epilogue = websearch_templates.tmpl_xml_marc_epilogue() elif format.startswith('xn'): epilogue = websearch_templates.tmpl_xml_nlm_epilogue() elif format.startswith('xw'): epilogue = websearch_templates.tmpl_xml_refworks_epilogue() elif format.startswith('xr'): epilogue = websearch_templates.tmpl_xml_rss_epilogue() elif format.startswith('xe'): epilogue = websearch_templates.tmpl_xml_endnote_epilogue() elif format.startswith('xo'): epilogue = websearch_templates.tmpl_xml_mods_epilogue() elif format.startswith('xp'): epilogue = websearch_templates.tmpl_xml_podcast_epilogue() elif format.startswith('x'): epilogue = websearch_templates.tmpl_xml_default_epilogue() req.write(epilogue) def get_record(recid): """Directly the record object corresponding to the recid.""" if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE: value = run_sql("SELECT value FROM bibfmt WHERE id_bibrec=%s AND FORMAT='recstruct'", (recid, )) if value: try: return deserialize_via_marshal(value[0][0]) except: ### In case of corruption, let's rebuild it! pass return create_record(print_record(recid, 'xm'))[0] def print_record(recID, format='hb', ot='', ln=CFG_SITE_LANG, decompress=zlib.decompress, search_pattern=None, user_info=None, verbose=0, sf='', so='d', sp='', rm=''): """ Prints record 'recID' formatted according to 'format'. 'sf' is sort field and 'rm' is ranking method that are passed here only for proper linking purposes: e.g. when a certain ranking method or a certain sort field was selected, keep it selected in any dynamic search links that may be printed. """ if format == 'recstruct': return get_record(recID) _ = gettext_set_language(ln) display_claim_this_paper = False try: display_claim_this_paper = user_info["precached_viewclaimlink"] except (KeyError, TypeError): display_claim_this_paper = False #check from user information if the user has the right to see hidden fields/tags in the #records as well can_see_hidden = (acc_authorize_action(user_info, 'runbibedit')[0] == 0) out = "" # sanity check: record_exist_p = record_exists(recID) if record_exist_p == 0: # doesn't exist return out # New Python BibFormat procedure for formatting # Old procedure follows further below # We must still check some special formats, but these # should disappear when BibFormat improves. if not (CFG_BIBFORMAT_USE_OLD_BIBFORMAT \ or format.lower().startswith('t') \ or format.lower().startswith('hm') \ or str(format[0:3]).isdigit() \ or ot): # Unspecified format is hd if format == '': format = 'hd' if record_exist_p == -1 and get_output_format_content_type(format) == 'text/html': # HTML output displays a default value for deleted records. # Other format have to deal with it. out += _("The record has been deleted.") else: out += call_bibformat(recID, format, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose) # at the end of HTML brief mode, print the "Detailed record" functionality: if format.lower().startswith('hb') and \ format.lower() != 'hb_p': out += websearch_templates.tmpl_print_record_brief_links(ln=ln, recID=recID, sf=sf, so=so, sp=sp, rm=rm, display_claim_link=display_claim_this_paper) return out # Old PHP BibFormat procedure for formatting # print record opening tags, if needed: if format == "marcxml" or format == "oai_dc": out += " <record>\n" out += " <header>\n" for oai_id in get_fieldvalues(recID, CFG_OAI_ID_FIELD): out += " <identifier>%s</identifier>\n" % oai_id out += " <datestamp>%s</datestamp>\n" % get_modification_date(recID) out += " </header>\n" out += " <metadata>\n" if format.startswith("xm") or format == "marcxml": # look for detailed format existence: query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s" res = run_sql(query, (recID, format), 1) if res and record_exist_p == 1: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format' -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables: if format == "marcxml": out += """ <record xmlns="http://www.loc.gov/MARC21/slim">\n""" out += " <controlfield tag=\"001\">%d</controlfield>\n" % int(recID) elif format.startswith("xm"): out += """ <record>\n""" out += " <controlfield tag=\"001\">%d</controlfield>\n" % int(recID) if record_exist_p == -1: # deleted record, so display only OAI ID and 980: oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD) if oai_ids: out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \ (CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4], CFG_OAI_ID_FIELD[4:5], CFG_OAI_ID_FIELD[5:6], oai_ids[0]) out += "<datafield tag=\"980\" ind1=\"\" ind2=\"\"><subfield code=\"c\">DELETED</subfield></datafield>\n" else: # controlfields query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\ "WHERE bb.id_bibrec=%s AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\ "ORDER BY bb.field_number, b.tag ASC" res = run_sql(query, (recID, )) for row in res: field, value = row[0], row[1] value = encode_for_xml(value) out += """ <controlfield tag="%s" >%s</controlfield>\n""" % \ (encode_for_xml(field[0:3]), value) # datafields i = 1 # Do not process bib00x and bibrec_bib00x, as # they are controlfields. So start at bib01x and # bibrec_bib00x (and set i = 0 at the end of # first loop) for digit1 in range(0, 10): for digit2 in range(i, 10): bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx) res = run_sql(query, (recID, str(digit1)+str(digit2)+'%')) field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] if ind1 == "_" or ind1 == "": ind1 = " " if ind2 == "_" or ind2 == "": ind2 = " " # print field tag, unless hidden printme = True if not can_see_hidden: for htag in CFG_BIBFORMAT_HIDDEN_TAGS: ltag = len(htag) samelenfield = field[0:ltag] if samelenfield == htag: printme = False if printme: if field_number != field_number_old or field[:-1] != field_old[:-1]: if field_number_old != -999: out += """ </datafield>\n""" out += """ <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \ (encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2)) field_number_old = field_number field_old = field # print subfield value value = encode_for_xml(value) out += """ <subfield code="%s">%s</subfield>\n""" % \ (encode_for_xml(field[-1:]), value) # all fields/subfields printed in this run, so close the tag: if field_number_old != -999: out += """ </datafield>\n""" i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x # we are at the end of printing the record: out += " </record>\n" elif format == "xd" or format == "oai_dc": # XML Dublin Core format, possibly OAI -- select only some bibXXx fields: out += """ <dc xmlns="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://purl.org/dc/elements/1.1/ http://www.openarchives.org/OAI/1.1/dc.xsd">\n""" if record_exist_p == -1: out += "" else: for f in get_fieldvalues(recID, "041__a"): out += " <language>%s</language>\n" % f for f in get_fieldvalues(recID, "100__a"): out += " <creator>%s</creator>\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "700__a"): out += " <creator>%s</creator>\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "245__a"): out += " <title>%s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "65017a"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "8564_u"): if f.split('.') == 'png': continue out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "520__a"): out += " %s\n" % encode_for_xml(f) out += " %s\n" % get_creation_date(recID) out += " \n" elif len(format) == 6 and str(format[0:3]).isdigit(): # user has asked to print some fields only if format == "001": out += "%s\n" % (format, recID, format) else: vals = get_fieldvalues(recID, format) for val in vals: out += "%s\n" % (format, val, format) elif format.startswith('t'): ## user directly asked for some tags to be displayed only if record_exist_p == -1: out += get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden) else: out += get_fieldvalues_alephseq_like(recID, ot, can_see_hidden) elif format == "hm": if record_exist_p == -1: out += "\n
      " + cgi.escape(get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden)) + "
      " else: out += "\n
      " + cgi.escape(get_fieldvalues_alephseq_like(recID, ot, can_see_hidden)) + "
      " elif format.startswith("h") and ot: ## user directly asked for some tags to be displayed only if record_exist_p == -1: out += "\n
      " + get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden) + "
      " else: out += "\n
      " + get_fieldvalues_alephseq_like(recID, ot, can_see_hidden) + "
      " elif format == "hd": # HTML detailed format if record_exist_p == -1: out += _("The record has been deleted.") else: # look for detailed format existence: query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s" res = run_sql(query, (recID, format), 1) if res: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format: out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose) if out_record_in_format: out += out_record_in_format else: out += websearch_templates.tmpl_print_record_detailed( ln = ln, recID = recID, ) elif format.startswith("hb_") or format.startswith("hd_"): # underscore means that HTML brief/detailed formats should be called on-the-fly; suitable for testing formats if record_exist_p == -1: out += _("The record has been deleted.") else: out += call_bibformat(recID, format, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose) elif format.startswith("hx"): # BibTeX format, called on the fly: if record_exist_p == -1: out += _("The record has been deleted.") else: out += call_bibformat(recID, format, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose) elif format.startswith("hs"): # for citation/download similarity navigation links: if record_exist_p == -1: out += _("The record has been deleted.") else: out += '' % websearch_templates.build_search_url(recid=recID, ln=ln) # firstly, title: titles = get_fieldvalues(recID, "245__a") if titles: for title in titles: out += "%s" % title else: # usual title not found, try conference title: titles = get_fieldvalues(recID, "111__a") if titles: for title in titles: out += "%s" % title else: # just print record ID: out += "%s %d" % (get_field_i18nname("record ID", ln, False), recID) out += "" # secondly, authors: authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a") if authors: out += " - %s" % authors[0] if len(authors) > 1: out += " et al" # thirdly publication info: publinfos = get_fieldvalues(recID, "773__s") if not publinfos: publinfos = get_fieldvalues(recID, "909C4s") if not publinfos: publinfos = get_fieldvalues(recID, "037__a") if not publinfos: publinfos = get_fieldvalues(recID, "088__a") if publinfos: out += " - %s" % publinfos[0] else: # fourthly publication year (if not publication info): years = get_fieldvalues(recID, "773__y") if not years: years = get_fieldvalues(recID, "909C4y") if not years: years = get_fieldvalues(recID, "260__c") if years: out += " (%s)" % years[0] else: # HTML brief format by default if record_exist_p == -1: out += _("The record has been deleted.") else: query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s" res = run_sql(query, (recID, format)) if res: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly: or use default format: if CFG_WEBSEARCH_CALL_BIBFORMAT: out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose) if out_record_in_format: out += out_record_in_format else: out += websearch_templates.tmpl_print_record_brief( ln = ln, recID = recID, ) else: out += websearch_templates.tmpl_print_record_brief( ln = ln, recID = recID, ) # at the end of HTML brief mode, print the "Detailed record" functionality: if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"): pass # do nothing for portfolio and on-the-fly formats else: out += websearch_templates.tmpl_print_record_brief_links(ln=ln, recID=recID, sf=sf, so=so, sp=sp, rm=rm, display_claim_link=display_claim_this_paper) # print record closing tags, if needed: if format == "marcxml" or format == "oai_dc": out += " \n" out += " \n" return out def call_bibformat(recID, format="HD", ln=CFG_SITE_LANG, search_pattern=None, user_info=None, verbose=0): """ Calls BibFormat and returns formatted record. BibFormat will decide by itself if old or new BibFormat must be used. """ from invenio.bibformat_utils import get_pdf_snippets keywords = [] if search_pattern is not None: units = create_basic_search_units(None, str(search_pattern), None) keywords = [unit[1] for unit in units if (unit[0] != '-' and unit[2] in [None, 'fulltext'])] out = format_record(recID, of=format, ln=ln, search_pattern=keywords, user_info=user_info, verbose=verbose) if CFG_WEBSEARCH_FULLTEXT_SNIPPETS and user_info and \ 'fulltext' in user_info['uri']: # check snippets only if URL contains fulltext # FIXME: make it work for CLI too, via new function arg if keywords: snippets = get_pdf_snippets(recID, keywords) if snippets: out += snippets return out def log_query(hostname, query_args, uid=-1): """ Log query into the query and user_query tables. Return id_query or None in case of problems. """ id_query = None if uid >= 0: # log the query only if uid is reasonable res = run_sql("SELECT id FROM query WHERE urlargs=%s", (query_args,), 1) try: id_query = res[0][0] except: id_query = run_sql("INSERT INTO query (type, urlargs) VALUES ('r', %s)", (query_args,)) if id_query: run_sql("INSERT INTO user_query (id_user, id_query, hostname, date) VALUES (%s, %s, %s, %s)", (uid, id_query, hostname, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) return id_query def log_query_info(action, p, f, colls, nb_records_found_total=-1): """Write some info to the log file for later analysis.""" try: log = open(CFG_LOGDIR + "/search.log", "a") log.write(time.strftime("%Y%m%d%H%M%S#", time.localtime())) log.write(action+"#") log.write(p+"#") log.write(f+"#") for coll in colls[:-1]: log.write("%s," % coll) log.write("%s#" % colls[-1]) log.write("%d" % nb_records_found_total) log.write("\n") log.close() except: pass return ### CALLABLES def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, sf="", so="d", sp="", rm="", of="id", ot="", aas=0, p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc=0, jrec=0, recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG, ec=None, tab="", wl=CFG_WEBSEARCH_WILDCARD_LIMIT): """Perform search or browse request, without checking for authentication. Return list of recIDs found, if of=id. Otherwise create web page. The arguments are as follows: req - mod_python Request class instance. cc - current collection (e.g. "ATLAS"). The collection the user started to search/browse from. c - collection list (e.g. ["Theses", "Books"]). The collections user may have selected/deselected when starting to search from 'cc'. p - pattern to search for (e.g. "ellis and muon or kaon"). f - field to search within (e.g. "author"). rg - records in groups of (e.g. "10"). Defines how many hits per collection in the search results page are displayed. sf - sort field (e.g. "title"). so - sort order ("a"=ascending, "d"=descending). sp - sort pattern (e.g. "CERN-") -- in case there are more values in a sort field, this argument tells which one to prefer rm - ranking method (e.g. "jif"). Defines whether results should be ranked by some known ranking method. of - output format (e.g. "hb"). Usually starting "h" means HTML output (and "hb" for HTML brief, "hd" for HTML detailed), "x" means XML output, "t" means plain text output, "id" means no output at all but to return list of recIDs found. (Suitable for high-level API.) ot - output only these MARC tags (e.g. "100,700,909C0b"). Useful if only some fields are to be shown in the output, e.g. for library to control some fields. aas - advanced search ("0" means no, "1" means yes). Whether search was called from within the advanced search interface. p1 - first pattern to search for in the advanced search interface. Much like 'p'. f1 - first field to search within in the advanced search interface. Much like 'f'. m1 - first matching type in the advanced search interface. ("a" all of the words, "o" any of the words, "e" exact phrase, "p" partial phrase, "r" regular expression). op1 - first operator, to join the first and the second unit in the advanced search interface. ("a" add, "o" or, "n" not). p2 - second pattern to search for in the advanced search interface. Much like 'p'. f2 - second field to search within in the advanced search interface. Much like 'f'. m2 - second matching type in the advanced search interface. ("a" all of the words, "o" any of the words, "e" exact phrase, "p" partial phrase, "r" regular expression). op2 - second operator, to join the second and the third unit in the advanced search interface. ("a" add, "o" or, "n" not). p3 - third pattern to search for in the advanced search interface. Much like 'p'. f3 - third field to search within in the advanced search interface. Much like 'f'. m3 - third matching type in the advanced search interface. ("a" all of the words, "o" any of the words, "e" exact phrase, "p" partial phrase, "r" regular expression). sc - split by collection ("0" no, "1" yes). Governs whether we want to present the results in a single huge list, or splitted by collection. jrec - jump to record (e.g. "234"). Used for navigation inside the search results. recid - display record ID (e.g. "20000"). Do not search/browse but go straight away to the Detailed record page for the given recID. recidb - display record ID bis (e.g. "20010"). If greater than 'recid', then display records from recid to recidb. Useful for example for dumping records from the database for reformatting. sysno - display old system SYS number (e.g. ""). If you migrate to Invenio from another system, and store your old SYS call numbers, you can use them instead of recid if you wish so. id - the same as recid, in case recid is not set. For backwards compatibility. idb - the same as recid, in case recidb is not set. For backwards compatibility. sysnb - the same as sysno, in case sysno is not set. For backwards compatibility. action - action to do. "SEARCH" for searching, "Browse" for browsing. Default is to search. d1 - first datetime in full YYYY-mm-dd HH:MM:DD format (e.g. "1998-08-23 12:34:56"). Useful for search limits on creation/modification date (see 'dt' argument below). Note that 'd1' takes precedence over d1y, d1m, d1d if these are defined. d1y - first date's year (e.g. "1998"). Useful for search limits on creation/modification date. d1m - first date's month (e.g. "08"). Useful for search limits on creation/modification date. d1d - first date's day (e.g. "23"). Useful for search limits on creation/modification date. d2 - second datetime in full YYYY-mm-dd HH:MM:DD format (e.g. "1998-09-02 12:34:56"). Useful for search limits on creation/modification date (see 'dt' argument below). Note that 'd2' takes precedence over d2y, d2m, d2d if these are defined. d2y - second date's year (e.g. "1998"). Useful for search limits on creation/modification date. d2m - second date's month (e.g. "09"). Useful for search limits on creation/modification date. d2d - second date's day (e.g. "02"). Useful for search limits on creation/modification date. dt - first and second date's type (e.g. "c"). Specifies whether to search in creation dates ("c") or in modification dates ("m"). When dt is not set and d1* and d2* are set, the default is "c". verbose - verbose level (0=min, 9=max). Useful to print some internal information on the searching process in case something goes wrong. ap - alternative patterns (0=no, 1=yes). In case no exact match is found, the search engine can try alternative patterns e.g. to replace non-alphanumeric characters by a boolean query. ap defines if this is wanted. ln - language of the search interface (e.g. "en"). Useful for internationalization. ec - list of external search engines to search as well (e.g. "SPIRES HEP"). wl - wildcard limit (ex: 100) the wildcard queries will be limited at 100 results """ selected_external_collections_infos = None # wash output format: of = wash_output_format(of) # raise an exception when trying to print out html from the cli if of.startswith("h"): assert req # for every search engine request asking for an HTML output, we # first regenerate cache of collection and field I18N names if # needed; so that later we won't bother checking timestamps for # I18N names at all: if of.startswith("h"): collection_i18nname_cache.recreate_cache_if_needed() field_i18nname_cache.recreate_cache_if_needed() # wash all arguments requiring special care try: (cc, colls_to_display, colls_to_search, hosted_colls, wash_colls_debug) = wash_colls(cc, c, sc, verbose) # which colls to search and to display? except InvenioWebSearchUnknownCollectionError, exc: colname = exc.colname if of.startswith("h"): page_start(req, of, cc, aas, ln, getUid(req), websearch_templates.tmpl_collection_not_found_page_title(colname, ln)) req.write(websearch_templates.tmpl_collection_not_found_page_body(colname, ln)) return page_end(req, of, ln) elif of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) else: return page_end(req, of, ln) p = wash_pattern(p) f = wash_field(f) p1 = wash_pattern(p1) f1 = wash_field(f1) p2 = wash_pattern(p2) f2 = wash_field(f2) p3 = wash_pattern(p3) f3 = wash_field(f3) datetext1, datetext2 = wash_dates(d1, d1y, d1m, d1d, d2, d2y, d2m, d2d) # wash ranking method: if not is_method_valid(None, rm): rm = "" _ = gettext_set_language(ln) # backwards compatibility: id, idb, sysnb -> recid, recidb, sysno (if applicable) if sysnb != "" and sysno == "": sysno = sysnb if id > 0 and recid == -1: recid = id if idb > 0 and recidb == -1: recidb = idb # TODO deduce passed search limiting criterias (if applicable) pl, pl_in_url = "", "" # no limits by default if action != "browse" and req and not isinstance(req, cStringIO.OutputType) \ and req.args: # we do not want to add options while browsing or while calling via command-line fieldargs = cgi.parse_qs(req.args) for fieldcode in get_fieldcodes(): if fieldargs.has_key(fieldcode): for val in fieldargs[fieldcode]: pl += "+%s:\"%s\" " % (fieldcode, val) pl_in_url += "&%s=%s" % (urllib.quote(fieldcode), urllib.quote(val)) # deduce recid from sysno argument (if applicable): if sysno: # ALEPH SYS number was passed, so deduce DB recID for the record: recid = get_mysql_recid_from_aleph_sysno(sysno) if recid is None: recid = 0 # use recid 0 to indicate that this sysno does not exist # deduce collection we are in (if applicable): if recid > 0: referer = None if req: referer = req.headers_in.get('Referer') cc = guess_collection_of_a_record(recid, referer) # deduce user id (if applicable): try: uid = getUid(req) except: uid = 0 ## 0 - start output if recid >= 0: # recid can be 0 if deduced from sysno and if such sysno does not exist ## 1 - detailed record display title, description, keywords = \ websearch_templates.tmpl_record_page_header_content(req, recid, ln) if req is not None and not req.header_only: page_start(req, of, cc, aas, ln, uid, title, description, keywords, recid, tab) # Default format is hb but we are in detailed -> change 'of' if of == "hb": of = "hd" if record_exists(recid): if recidb <= recid: # sanity check recidb = recid + 1 if of == "id": return [recidx for recidx in range(recid, recidb) if record_exists(recidx)] else: print_records(req, range(recid, recidb), -1, -9999, of, ot, ln, search_pattern=p, verbose=verbose, tab=tab, sf=sf, so=so, sp=sp, rm=rm) if req and of.startswith("h"): # register detailed record page view event client_ip_address = str(req.remote_ip) register_page_view_event(recid, uid, client_ip_address) else: # record does not exist if of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) elif of.startswith("h"): if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND else: print_warning(req, _("Requested record does not seem to exist.")) elif action == "browse": ## 2 - browse needed of = 'hb' page_start(req, of, cc, aas, ln, uid, _("Browse"), p=create_page_title_search_pattern_info(p, p1, p2, p3)) req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action)) try: if aas == 1 or (p1 or p2 or p3): browse_pattern(req, colls_to_search, p1, f1, rg, ln) browse_pattern(req, colls_to_search, p2, f2, rg, ln) browse_pattern(req, colls_to_search, p3, f3, rg, ln) else: browse_pattern(req, colls_to_search, p, f, rg, ln) except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) elif rm and p.startswith("recid:"): ## 3-ter - similarity search (or old-style citation search) needed if req and not req.header_only: page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3)) if of.startswith("h"): req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action)) if record_exists(p[6:]) != 1: # record does not exist if of.startswith("h"): if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND else: print_warning(req, _("Requested record does not seem to exist.")) if of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) else: # record well exists, so find similar ones to it t1 = os.times()[4] results_similar_recIDs, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, results_similar_comments = \ rank_records(rm, 0, get_collection_reclist(cc), string.split(p), verbose) if results_similar_recIDs: t2 = os.times()[4] cpu_time = t2 - t1 if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, cc, len(results_similar_recIDs), jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) print_warning(req, results_similar_comments) print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) elif of=="id": return results_similar_recIDs elif of.startswith("x"): print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) else: # rank_records failed and returned some error message to display: if of.startswith("h"): print_warning(req, results_similar_relevances_prologue) print_warning(req, results_similar_relevances_epilogue) print_warning(req, results_similar_comments) if of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) elif p.startswith("cocitedwith:"): #WAS EXPERIMENTAL ## 3-terter - cited by search needed page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3)) if of.startswith("h"): req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action)) recID = p[12:] if record_exists(recID) != 1: # record does not exist if of.startswith("h"): print_warning(req, _("Requested record does not seem to exist.")) if of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) else: # record well exists, so find co-cited ones: t1 = os.times()[4] results_cocited_recIDs = map(lambda x: x[0], calculate_co_cited_with_list(int(recID))) if results_cocited_recIDs: t2 = os.times()[4] cpu_time = t2 - t1 if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, CFG_SITE_NAME, len(results_cocited_recIDs), jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) elif of=="id": return results_cocited_recIDs elif of.startswith("x"): print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) else: # cited rank_records failed and returned some error message to display: if of.startswith("h"): print_warning(req, "nothing found") if of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) else: ## 3 - common search needed query_in_cache = False query_representation_in_cache = repr((p,f,colls_to_search, wl)) page_start(req, of, cc, aas, ln, uid, p=create_page_title_search_pattern_info(p, p1, p2, p3)) if of.startswith("h") and verbose and wash_colls_debug: print_warning(req, "wash_colls debugging info : %s" % wash_colls_debug) # search into the hosted collections only if the output format is html or xml if hosted_colls and (of.startswith("h") or of.startswith("x")) and not p.startswith("recid:"): # hosted_colls_results : the hosted collections' searches that did not timeout # hosted_colls_timeouts : the hosted collections' searches that timed out and will be searched later on again (hosted_colls_results, hosted_colls_timeouts) = calculate_hosted_collections_results(req, [p, p1, p2, p3], f, hosted_colls, verbose, ln, CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH) # successful searches if hosted_colls_results: hosted_colls_true_results = [] for result in hosted_colls_results: # if the number of results is None or 0 (or False) then just do nothing if result[1] == None or result[1] == False: # these are the searches the returned no or zero results if verbose: print_warning(req, "Hosted collections (perform_search_request): %s returned no results" % result[0][1].name) else: # these are the searches that actually returned results on time hosted_colls_true_results.append(result) if verbose: print_warning(req, "Hosted collections (perform_search_request): %s returned %s results in %s seconds" % (result[0][1].name, result[1], result[2])) else: if verbose: print_warning(req, "Hosted collections (perform_search_request): there were no hosted collections results to be printed at this time") if hosted_colls_timeouts: if verbose: for timeout in hosted_colls_timeouts: print_warning(req, "Hosted collections (perform_search_request): %s timed out and will be searched again later" % timeout[0][1].name) # we need to know for later use if there were any hosted collections to be searched even if they weren't in the end elif hosted_colls and ((not (of.startswith("h") or of.startswith("x"))) or p.startswith("recid:")): (hosted_colls_results, hosted_colls_timeouts) = (None, None) else: if verbose: print_warning(req, "Hosted collections (perform_search_request): there were no hosted collections to be searched") ## let's define some useful boolean variables: # True means there are actual or potential hosted collections results to be printed hosted_colls_actual_or_potential_results_p = not (not hosted_colls or not ((hosted_colls_results and hosted_colls_true_results) or hosted_colls_timeouts)) # True means there are hosted collections timeouts to take care of later # (useful for more accurate printing of results later) hosted_colls_potential_results_p = not (not hosted_colls or not hosted_colls_timeouts) # True means we only have hosted collections to deal with only_hosted_colls_actual_or_potential_results_p = not colls_to_search and hosted_colls_actual_or_potential_results_p if of.startswith("h"): req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action)) t1 = os.times()[4] results_in_any_collection = HitSet() if aas == 1 or (p1 or p2 or p3): ## 3A - advanced search try: results_in_any_collection = search_pattern_parenthesised(req, p1, f1, m1, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl) if len(results_in_any_collection) == 0: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) if p2: results_tmp = search_pattern_parenthesised(req, p2, f2, m2, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl) if op1 == "a": # add results_in_any_collection.intersection_update(results_tmp) elif op1 == "o": # or results_in_any_collection.union_update(results_tmp) elif op1 == "n": # not results_in_any_collection.difference_update(results_tmp) else: if of.startswith("h"): print_warning(req, "Invalid set operation %s." % cgi.escape(op1), "Error") if len(results_in_any_collection) == 0: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) if p3: results_tmp = search_pattern_parenthesised(req, p3, f3, m3, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl) if op2 == "a": # add results_in_any_collection.intersection_update(results_tmp) elif op2 == "o": # or results_in_any_collection.union_update(results_tmp) elif op2 == "n": # not results_in_any_collection.difference_update(results_tmp) else: if of.startswith("h"): print_warning(req, "Invalid set operation %s." % cgi.escape(op2), "Error") except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) else: ## 3B - simple search if search_results_cache.cache.has_key(query_representation_in_cache): # query is not in the cache already, so reuse it: query_in_cache = True results_in_any_collection = search_results_cache.cache[query_representation_in_cache] if verbose and of.startswith("h"): print_warning(req, "Search stage 0: query found in cache, reusing cached results.") else: try: # added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection" # recommendations when there are results only in the hosted collections. Also added the if clause to avoid # searching in case we know we only have actual or potential hosted collections results if not only_hosted_colls_actual_or_potential_results_p: results_in_any_collection = search_pattern_parenthesised(req, p, f, ap=ap, of=of, verbose=verbose, ln=ln, display_nearest_terms_box=not hosted_colls_actual_or_potential_results_p, wl=wl) except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) return page_end(req, of, ln) if len(results_in_any_collection) == 0 and not hosted_colls_actual_or_potential_results_p: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) # store this search query results into search results cache if needed: if CFG_WEBSEARCH_SEARCH_CACHE_SIZE and not query_in_cache: if len(search_results_cache.cache) > CFG_WEBSEARCH_SEARCH_CACHE_SIZE: search_results_cache.clear() search_results_cache.cache[query_representation_in_cache] = results_in_any_collection if verbose and of.startswith("h"): print_warning(req, "Search stage 3: storing query results in cache.") # search stage 4: intersection with collection universe: try: # added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection" # recommendations when there results only in the hosted collections. Also added the if clause to avoid # searching in case we know since the last stage that we have no results in any collection if len(results_in_any_collection) != 0: results_final = intersect_results_with_collrecs(req, results_in_any_collection, colls_to_search, ap, of, verbose, ln, display_nearest_terms_box=not hosted_colls_actual_or_potential_results_p) else: results_final = {} except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) return page_end(req, of, ln) if results_final == {} and not hosted_colls_actual_or_potential_results_p: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) if of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) # search stage 5: apply search option limits and restrictions: if datetext1 != "" and results_final != {}: if verbose and of.startswith("h"): print_warning(req, "Search stage 5: applying time etc limits, from %s until %s..." % (datetext1, datetext2)) try: results_final = intersect_results_with_hitset(req, results_final, search_unit_in_bibrec(datetext1, datetext2, dt), ap, aptext= _("No match within your time limits, " "discarding this condition..."), of=of) except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) return page_end(req, of, ln) if results_final == {} and not hosted_colls_actual_or_potential_results_p: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) #if of.startswith("x"): # # Print empty, but valid XML # print_records_prologue(req, of) # print_records_epilogue(req, of) return page_end(req, of, ln) if pl and results_final != {}: pl = wash_pattern(pl) if verbose and of.startswith("h"): print_warning(req, "Search stage 5: applying search pattern limit %s..." % cgi.escape(pl)) try: results_final = intersect_results_with_hitset(req, results_final, search_pattern_parenthesised(req, pl, ap=0, ln=ln, wl=wl), ap, aptext=_("No match within your search limits, " "discarding this condition..."), of=of) except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) return page_end(req, of, ln) if results_final == {} and not hosted_colls_actual_or_potential_results_p: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) if of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) t2 = os.times()[4] cpu_time = t2 - t1 ## search stage 6: display results: results_final_nb_total = 0 results_final_nb = {} # will hold number of records found in each collection # (in simple dict to display overview more easily) for coll in results_final.keys(): results_final_nb[coll] = len(results_final[coll]) #results_final_nb_total += results_final_nb[coll] # Now let us calculate results_final_nb_total more precisely, # in order to get the total number of "distinct" hits across # searched collections; this is useful because a record might # have been attributed to more than one primary collection; so # we have to avoid counting it multiple times. The price to # pay for this accuracy of results_final_nb_total is somewhat # increased CPU time. if results_final.keys() == 1: # only one collection; no need to union them results_final_for_all_selected_colls = results_final.values()[0] results_final_nb_total = results_final_nb.values()[0] else: # okay, some work ahead to union hits across collections: results_final_for_all_selected_colls = HitSet() for coll in results_final.keys(): results_final_for_all_selected_colls.union_update(results_final[coll]) results_final_nb_total = len(results_final_for_all_selected_colls) #if hosted_colls and (of.startswith("h") or of.startswith("x")): if hosted_colls_actual_or_potential_results_p: if hosted_colls_results: for result in hosted_colls_true_results: colls_to_search.append(result[0][1].name) results_final_nb[result[0][1].name] = result[1] results_final_nb_total += result[1] cpu_time += result[2] if hosted_colls_timeouts: for timeout in hosted_colls_timeouts: colls_to_search.append(timeout[1].name) # use -963 as a special number to identify the collections that timed out results_final_nb[timeout[1].name] = -963 # we continue past this point only if there is a hosted collection that has timed out and might offer potential results if results_final_nb_total ==0 and not hosted_colls_potential_results_p: if of.startswith("h"): print_warning(req, "No match found, please enter different search terms.") elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) else: # yes, some hits found: good! # collection list may have changed due to not-exact-match-found policy so check it out: for coll in results_final.keys(): if coll not in colls_to_search: colls_to_search.append(coll) # print results overview: if of == "id": # we have been asked to return list of recIDs recIDs = list(results_final_for_all_selected_colls) if sf: # do we have to sort? recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of) elif rm: # do we have to rank? results_final_for_all_colls_rank_records_output = rank_records(rm, 0, results_final_for_all_selected_colls, string.split(p) + string.split(p1) + string.split(p2) + string.split(p3), verbose) if results_final_for_all_colls_rank_records_output[0]: recIDs = results_final_for_all_colls_rank_records_output[0] return recIDs elif of.startswith("h"): if of not in ['hcs']: # added the hosted_colls_potential_results_p parameter to help print out the overview more accurately req.write(print_results_overview(colls_to_search, results_final_nb_total, results_final_nb, cpu_time, ln, ec, hosted_colls_potential_results_p=hosted_colls_potential_results_p)) selected_external_collections_infos = print_external_results_overview(req, cc, [p, p1, p2, p3], f, ec, verbose, ln) # print number of hits found for XML outputs: if of.startswith("x"): req.write("\n" % results_final_nb_total) # print records: if of in ['hcs']: # feed the current search to be summarized: from invenio.search_engine_summarizer import summarize_records search_p = p search_f = f if not p and (aas == 1 or p1 or p2 or p3): op_d = {'n': ' and not ', 'a': ' and ', 'o': ' or ', '': ''} triples = ziplist([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) triples_len = len(triples) for i in range(triples_len): fi, pi, oi = triples[i] # e.g.: if i < triples_len-1 and not triples[i+1][1]: # if p2 empty triples[i+1][0] = '' # f2 must be too oi = '' # and o1 if ' ' in pi: pi = '"'+pi+'"' if fi: fi = fi + ':' search_p += fi + pi + op_d[oi] search_f = '' summarize_records(results_final_for_all_selected_colls, 'hcs', ln, search_p, search_f, req) else: if len(colls_to_search)>1: cpu_time = -1 # we do not want to have search time printed on each collection print_records_prologue(req, of, cc=cc) for coll in colls_to_search: if results_final.has_key(coll) and len(results_final[coll]): if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) results_final_recIDs = list(results_final[coll]) results_final_relevances = [] results_final_relevances_prologue = "" results_final_relevances_epilogue = "" if sf: # do we have to sort? results_final_recIDs = sort_records(req, results_final_recIDs, sf, so, sp, verbose, of) elif rm: # do we have to rank? results_final_recIDs_ranked, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, results_final_comments = \ rank_records(rm, 0, results_final[coll], string.split(p) + string.split(p1) + string.split(p2) + string.split(p3), verbose) if of.startswith("h"): print_warning(req, results_final_comments) if results_final_recIDs_ranked: results_final_recIDs = results_final_recIDs_ranked else: # rank_records failed and returned some error message to display: print_warning(req, results_final_relevances_prologue) print_warning(req, results_final_relevances_epilogue) print_records(req, results_final_recIDs, jrec, rg, of, ot, ln, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, search_pattern=p, print_records_prologue_p=False, print_records_epilogue_p=False, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1)) #if hosted_colls and (of.startswith("h") or of.startswith("x")): if hosted_colls_actual_or_potential_results_p: if hosted_colls_results: # TODO: add a verbose message here for result in hosted_colls_true_results: if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg)) if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1)) if hosted_colls_timeouts: # TODO: add a verbose message here # TODO: check if verbose messages still work when dealing with (re)calculations of timeouts (hosted_colls_timeouts_results, hosted_colls_timeouts_timeouts) = do_calculate_hosted_collections_results(req, ln, None, verbose, None, hosted_colls_timeouts, CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH) if hosted_colls_timeouts_results: for result in hosted_colls_timeouts_results: if result[1] == None or result[1] == False: ## these are the searches the returned no or zero results ## also print a nearest terms box, in case this is the only ## collection being searched and it returns no results? if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963, jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, no_records_found=True, limit=rg)) req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963, jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1)) else: # these are the searches that actually returned results on time if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg)) if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1)) if hosted_colls_timeouts_timeouts: for timeout in hosted_colls_timeouts_timeouts: if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[1].name, -963, jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) req.write(print_hosted_results(url_and_engine=timeout[0], ln=ln, of=of, req=req, search_timed_out=True, limit=rg)) req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[1].name, -963, jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1)) print_records_epilogue(req, of) if f == "author" and of.startswith("h"): req.write(create_similarly_named_authors_link_box(p, ln)) # log query: try: id_query = log_query(req.remote_host, req.args, uid) if of.startswith("h") and id_query: if not of in ['hcs']: # display alert/RSS teaser for non-summary formats: user_info = collect_user_info(req) display_email_alert_part = True if user_info: if user_info['email'] == 'guest': if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS > 4: display_email_alert_part = False else: if not user_info['precached_usealerts']: display_email_alert_part = False req.write(websearch_templates.tmpl_alert_rss_teaser_box_for_query(id_query, \ ln=ln, display_email_alert_part=display_email_alert_part)) except: # do not log query if req is None (used by CLI interface) pass log_query_info("ss", p, f, colls_to_search, results_final_nb_total) # External searches if of.startswith("h"): if not of in ['hcs']: perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) return page_end(req, of, ln) def perform_request_cache(req, action="show"): """Manipulates the search engine cache.""" req.content_type = "text/html" req.send_http_header() req.write("") out = "" out += "

      Search Cache

      " # clear cache if requested: if action == "clear": search_results_cache.clear() req.write(out) # show collection reclist cache: out = "

      Collection reclist cache

      " out += "- collection table last updated: %s" % get_table_update_time('collection') out += "
      - reclist cache timestamp: %s" % collection_reclist_cache.timestamp out += "
      - reclist cache contents:" out += "
      " for coll in collection_reclist_cache.cache.keys(): if collection_reclist_cache.cache[coll]: out += "%s (%d)
      " % (coll, len(collection_reclist_cache.cache[coll])) out += "
      " req.write(out) # show search results cache: out = "

      Search Cache

      " out += "- search cache usage: %d queries cached (max. ~%d)" % \ (len(search_results_cache.cache), CFG_WEBSEARCH_SEARCH_CACHE_SIZE) if len(search_results_cache.cache): out += "
      - search cache contents:" out += "
      " for query, hitset in search_results_cache.cache.items(): out += "
      %s ... %s" % (query, hitset) out += """

      clear search results cache""" % CFG_SITE_URL out += "

      " req.write(out) # show field i18nname cache: out = "

      Field I18N names cache

      " out += "- fieldname table last updated: %s" % get_table_update_time('fieldname') out += "
      - i18nname cache timestamp: %s" % field_i18nname_cache.timestamp out += "
      - i18nname cache contents:" out += "
      " for field in field_i18nname_cache.cache.keys(): for ln in field_i18nname_cache.cache[field].keys(): out += "%s, %s = %s
      " % (field, ln, field_i18nname_cache.cache[field][ln]) out += "
      " req.write(out) # show collection i18nname cache: out = "

      Collection I18N names cache

      " out += "- collectionname table last updated: %s" % get_table_update_time('collectionname') out += "
      - i18nname cache timestamp: %s" % collection_i18nname_cache.timestamp out += "
      - i18nname cache contents:" out += "
      " for coll in collection_i18nname_cache.cache.keys(): for ln in collection_i18nname_cache.cache[coll].keys(): out += "%s, %s = %s
      " % (coll, ln, collection_i18nname_cache.cache[coll][ln]) out += "
      " req.write(out) req.write("") return "\n" def perform_request_log(req, date=""): """Display search log information for given date.""" req.content_type = "text/html" req.send_http_header() req.write("") req.write("

      Search Log

      ") if date: # case A: display stats for a day yyyymmdd = string.atoi(date) req.write("

      Date: %d

      " % yyyymmdd) req.write("""

      """) req.write("" % ("No.", "Time", "Pattern", "Field", "Collection", "Number of Hits")) # read file: p = os.popen("grep ^%d %s/search.log" % (yyyymmdd, CFG_LOGDIR), 'r') lines = p.readlines() p.close() # process lines: i = 0 for line in lines: try: datetime, dummy_aas, p, f, c, nbhits = string.split(line,"#") i += 1 req.write("" \ % (i, datetime[8:10], datetime[10:12], datetime[12:], p, f, c, nbhits)) except: pass # ignore eventual wrong log lines req.write("
      %s%s%s%s%s%s
      #%d%s:%s:%s%s%s%s%s
      ") else: # case B: display summary stats per day yyyymm01 = int(time.strftime("%Y%m01", time.localtime())) yyyymmdd = int(time.strftime("%Y%m%d", time.localtime())) req.write("""""") req.write("" % ("Day", "Number of Queries")) for day in range(yyyymm01, yyyymmdd + 1): p = os.popen("grep -c ^%d %s/search.log" % (day, CFG_LOGDIR), 'r') for line in p.readlines(): req.write("""""" % \ (day, CFG_SITE_URL, day, line)) p.close() req.write("
      %s%s
      %s%s
      ") req.write("") return "\n" def get_most_popular_field_values(recids, tags, exclude_values=None, count_repetitive_values=True): """ Analyze RECIDS and look for TAGS and return most popular values and the frequency with which they occur sorted according to descending frequency. If a value is found in EXCLUDE_VALUES, then do not count it. If COUNT_REPETITIVE_VALUES is True, then we count every occurrence of value in the tags. If False, then we count the value only once regardless of the number of times it may appear in a record. (But, if the same value occurs in another record, we count it, of course.) Example: >>> get_most_popular_field_values(range(11,20), '980__a') (('PREPRINT', 10), ('THESIS', 7), ...) >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a')) (('Ellis, J', 10), ('Ellis, N', 7), ...) >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'), ('Ellis, J')) (('Ellis, N', 7), ...) """ def _get_most_popular_field_values_helper_sorter(val1, val2): "Compare VAL1 and VAL2 according to, firstly, frequency, then secondly, alphabetically." compared_via_frequencies = cmp(valuefreqdict[val2], valuefreqdict[val1]) if compared_via_frequencies == 0: return cmp(val1.lower(), val2.lower()) else: return compared_via_frequencies valuefreqdict = {} ## sanity check: if not exclude_values: exclude_values = [] if isinstance(tags, str): tags = (tags,) ## find values to count: vals_to_count = [] displaytmp = {} if count_repetitive_values: # counting technique A: can look up many records at once: (very fast) for tag in tags: vals_to_count.extend(get_fieldvalues(recids, tag)) else: # counting technique B: must count record-by-record: (slow) for recid in recids: vals_in_rec = [] for tag in tags: for val in get_fieldvalues(recid, tag, False): vals_in_rec.append(val) # do not count repetitive values within this record # (even across various tags, so need to unify again): dtmp = {} for val in vals_in_rec: dtmp[val.lower()] = 1 displaytmp[val.lower()] = val vals_in_rec = dtmp.keys() vals_to_count.extend(vals_in_rec) ## are we to exclude some of found values? for val in vals_to_count: if val not in exclude_values: if valuefreqdict.has_key(val): valuefreqdict[val] += 1 else: valuefreqdict[val] = 1 ## sort by descending frequency of values: out = () vals = valuefreqdict.keys() vals.sort(_get_most_popular_field_values_helper_sorter) for val in vals: tmpdisplv = '' if displaytmp.has_key(val): tmpdisplv = displaytmp[val] else: tmpdisplv = val out += (tmpdisplv, valuefreqdict[val]), return out def profile(p="", f="", c=CFG_SITE_NAME): """Profile search time.""" import profile import pstats profile.run("perform_request_search(p='%s',f='%s', c='%s')" % (p, f, c), "perform_request_search_profile") p = pstats.Stats("perform_request_search_profile") p.strip_dirs().sort_stats("cumulative").print_stats() return 0 diff --git a/modules/websearch/lib/search_engine_utils.py b/modules/websearch/lib/search_engine_utils.py new file mode 100644 index 000000000..177f4704f --- /dev/null +++ b/modules/websearch/lib/search_engine_utils.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +## This file is part of Invenio. +## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. +## +## Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +# pylint: disable=C0301 + +"""Invenio search engine utilities.""" + +from invenio.dbquery import run_sql + +def get_fieldvalues(recIDs, tag, repetitive_values=True): + """ + Return list of field values for field TAG for the given record ID + or list of record IDs. (RECIDS can be both an integer or a list + of integers.) + + If REPETITIVE_VALUES is set to True, then return all values even + if they are doubled. If set to False, then return unique values + only. + """ + out = [] + if isinstance(recIDs, (int, long)): + recIDs = [recIDs,] + if not isinstance(recIDs, (list, tuple)): + return [] + if len(recIDs) == 0: + return [] + if tag == "001___": + # We have asked for tag 001 (=recID) that is not stored in bibXXx + # tables. + out = [str(recID) for recID in recIDs] + else: + # we are going to look inside bibXXx tables + digits = tag[0:2] + try: + intdigits = int(digits) + if intdigits < 0 or intdigits > 99: + raise ValueError + except ValueError: + # invalid tag value asked for + return [] + bx = "bib%sx" % digits + bibx = "bibrec_bib%sx" % digits + queryparam = [] + for recID in recIDs: + queryparam.append(recID) + if not repetitive_values: + queryselect = "DISTINCT(bx.value)" + else: + queryselect = "bx.value" + query = "SELECT %s FROM %s AS bx, %s AS bibx " \ + "WHERE bibx.id_bibrec IN (%s) AND bx.id=bibx.id_bibxxx AND " \ + "bx.tag LIKE %%s " \ + "ORDER BY bibx.field_number, bx.tag ASC" % \ + (queryselect, bx, bibx, ("%s,"*len(queryparam))[:-1]) + res = run_sql(query, tuple(queryparam) + (tag,)) + for row in res: + out.append(row[0]) + return out diff --git a/modules/websearch/lib/websearch_regression_tests.py b/modules/websearch/lib/websearch_regression_tests.py index 8144c9ce7..28f6e024f 100644 --- a/modules/websearch/lib/websearch_regression_tests.py +++ b/modules/websearch/lib/websearch_regression_tests.py @@ -1,1920 +1,1921 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0301 # pylint: disable=E1102 """WebSearch module regression tests.""" __revision__ = "$Id$" import unittest import re import urlparse, cgi import sys if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from mechanize import Browser, LinkNotFoundError from invenio.config import CFG_SITE_URL, CFG_SITE_NAME, CFG_SITE_LANG, \ CFG_SITE_RECORD from invenio.testutils import make_test_suite, \ run_test_suite, \ make_url, make_surl, test_web_page_content, \ merge_error_messages from invenio.urlutils import same_urls_p from invenio.search_engine import perform_request_search, \ guess_primary_collection_of_a_record, guess_collection_of_a_record, \ collection_restricted_p, get_permitted_restricted_collections, \ - get_fieldvalues, search_pattern, search_unit, search_unit_in_bibrec, \ + search_pattern, search_unit, search_unit_in_bibrec, \ wash_colls +from invenio.search_engine_utils import get_fieldvalues def parse_url(url): parts = urlparse.urlparse(url) query = cgi.parse_qs(parts[4], True) return parts[2].split('/')[1:], query class WebSearchWebPagesAvailabilityTest(unittest.TestCase): """Check WebSearch web pages whether they are up or not.""" def test_search_interface_pages_availability(self): """websearch - availability of search interface pages""" baseurl = CFG_SITE_URL + '/' _exports = ['', 'collection/Poetry', 'collection/Poetry?as=1'] error_messages = [] for url in [baseurl + page for page in _exports]: error_messages.extend(test_web_page_content(url)) if error_messages: self.fail(merge_error_messages(error_messages)) return def test_search_results_pages_availability(self): """websearch - availability of search results pages""" baseurl = CFG_SITE_URL + '/search' _exports = ['', '?c=Poetry', '?p=ellis', '/cache', '/log'] error_messages = [] for url in [baseurl + page for page in _exports]: error_messages.extend(test_web_page_content(url)) if error_messages: self.fail(merge_error_messages(error_messages)) return def test_search_detailed_record_pages_availability(self): """websearch - availability of search detailed record pages""" baseurl = CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' _exports = ['', '1', '1/', '1/files', '1/files/'] error_messages = [] for url in [baseurl + page for page in _exports]: error_messages.extend(test_web_page_content(url)) if error_messages: self.fail(merge_error_messages(error_messages)) return def test_browse_results_pages_availability(self): """websearch - availability of browse results pages""" baseurl = CFG_SITE_URL + '/search' _exports = ['?p=ellis&f=author&action_browse=Browse'] error_messages = [] for url in [baseurl + page for page in _exports]: error_messages.extend(test_web_page_content(url)) if error_messages: self.fail(merge_error_messages(error_messages)) return def test_help_page_availability(self): """websearch - availability of Help Central page""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help', expected_text="Help Central")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/?ln=fr', expected_text="Centre d'aide")) def test_search_tips_page_availability(self): """websearch - availability of Search Tips""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/search-tips', expected_text="Search Tips")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/search-tips?ln=fr', expected_text="Conseils de recherche")) def test_search_guide_page_availability(self): """websearch - availability of Search Guide""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/search-guide', expected_text="Search Guide")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/search-guide?ln=fr', expected_text="Guide de recherche")) class WebSearchTestLegacyURLs(unittest.TestCase): """ Check that the application still responds to legacy URLs for navigating, searching and browsing.""" def test_legacy_collections(self): """ websearch - collections handle legacy urls """ browser = Browser() def check(legacy, new, browser=browser): browser.open(legacy) got = browser.geturl() self.failUnless(same_urls_p(got, new), got) # Use the root URL unless we need more check(make_url('/', c=CFG_SITE_NAME), make_url('/', ln=CFG_SITE_LANG)) # Other collections are redirected in the /collection area check(make_url('/', c='Poetry'), make_url('/collection/Poetry', ln=CFG_SITE_LANG)) # Drop unnecessary arguments, like ln and as (when they are # the default value) args = {'as': 0} check(make_url('/', c='Poetry', **args), make_url('/collection/Poetry', ln=CFG_SITE_LANG)) # Otherwise, keep them args = {'as': 1, 'ln': CFG_SITE_LANG} check(make_url('/', c='Poetry', **args), make_url('/collection/Poetry', **args)) # Support the /index.py addressing too check(make_url('/index.py', c='Poetry'), make_url('/collection/Poetry', ln=CFG_SITE_LANG)) def test_legacy_search(self): """ websearch - search queries handle legacy urls """ browser = Browser() def check(legacy, new, browser=browser): browser.open(legacy) got = browser.geturl() self.failUnless(same_urls_p(got, new), got) # /search.py is redirected on /search # Note that `as' is a reserved word in Python 2.5 check(make_url('/search.py', p='nuclear', ln='en') + 'as=1', make_url('/search', p='nuclear', ln='en') + 'as=1') # direct recid searches are redirected to /CFG_SITE_RECORD check(make_url('/search.py', recid=1, ln='es'), make_url('/%s/1' % CFG_SITE_RECORD, ln='es')) def test_legacy_search_help_link(self): """websearch - legacy Search Help page link""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/search/index.en.html', expected_text="Help Central")) def test_legacy_search_tips_link(self): """websearch - legacy Search Tips page link""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/search/tips.fr.html', expected_text="Conseils de recherche")) def test_legacy_search_guide_link(self): """websearch - legacy Search Guide page link""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/search/guide.en.html', expected_text="Search Guide")) class WebSearchTestRecord(unittest.TestCase): """ Check the interface of the /CFG_SITE_RECORD results """ def test_format_links(self): """ websearch - check format links for records """ browser = Browser() # We open the record in all known HTML formats for hformat in ('hd', 'hx', 'hm'): browser.open(make_url('/%s/1' % CFG_SITE_RECORD, of=hformat)) if hformat == 'hd': # hd format should have a link to the following # formats for oformat in ('hx', 'hm', 'xm', 'xd'): target = make_url('/%s/1/export/%s?ln=en' % (CFG_SITE_RECORD, oformat)) try: browser.find_link(url=target) except LinkNotFoundError: self.fail('link %r should be in page' % target) else: # non-hd HTML formats should have a link back to # the main detailed record target = make_url('/%s/1' % CFG_SITE_RECORD) try: browser.find_link(url=target) except LinkNotFoundError: self.fail('link %r should be in page' % target) return def test_exported_formats(self): """ websearch - check formats exported through /CFG_SITE_RECORD/1/export/ URLs""" browser = Browser() self.assertEqual([], test_web_page_content(make_url('/%s/1/export/hm' % CFG_SITE_RECORD), expected_text='245__ $$aALEPH experiment')) self.assertEqual([], test_web_page_content(make_url('/%s/1/export/hd' % CFG_SITE_RECORD), expected_text='ALEPH experiment')) self.assertEqual([], test_web_page_content(make_url('/%s/1/export/xm' % CFG_SITE_RECORD), expected_text='ALEPH experiment')) self.assertEqual([], test_web_page_content(make_url('/%s/1/export/xd' % CFG_SITE_RECORD), expected_text='ALEPH experiment')) self.assertEqual([], test_web_page_content(make_url('/%s/1/export/hs' % CFG_SITE_RECORD), expected_text='ALEPH experiment' % \ (CFG_SITE_RECORD, CFG_SITE_LANG))) self.assertEqual([], test_web_page_content(make_url('/%s/1/export/hx' % CFG_SITE_RECORD), expected_text='title = "ALEPH experiment')) self.assertEqual([], test_web_page_content(make_url('/%s/1/export/t?ot=245' % CFG_SITE_RECORD), expected_text='245__ $$aALEPH experiment')) self.assertNotEqual([], test_web_page_content(make_url('/%s/1/export/t?ot=245' % CFG_SITE_RECORD), expected_text='001__')) self.assertEqual([], test_web_page_content(make_url('/%s/1/export/h?ot=245' % CFG_SITE_RECORD), expected_text='245__ $$aALEPH experiment')) self.assertNotEqual([], test_web_page_content(make_url('/%s/1/export/h?ot=245' % CFG_SITE_RECORD), expected_text='001__')) return class WebSearchTestCollections(unittest.TestCase): def test_traversal_links(self): """ websearch - traverse all the publications of a collection """ browser = Browser() try: for aas in (0, 1): args = {'as': aas} browser.open(make_url('/collection/Preprints', **args)) for jrec in (11, 21, 11, 28): args = {'jrec': jrec, 'cc': 'Preprints'} if aas: args['as'] = aas url = make_url('/search', **args) try: browser.follow_link(url=url) except LinkNotFoundError: args['ln'] = CFG_SITE_LANG url = make_url('/search', **args) browser.follow_link(url=url) except LinkNotFoundError: self.fail('no link %r in %r' % (url, browser.geturl())) def test_collections_links(self): """ websearch - enter in collections and subcollections """ browser = Browser() def tryfollow(url): cur = browser.geturl() body = browser.response().read() try: browser.follow_link(url=url) except LinkNotFoundError: print body self.fail("in %r: could not find %r" % ( cur, url)) return for aas in (0, 1): if aas: kargs = {'as': 1} else: kargs = {} kargs['ln'] = CFG_SITE_LANG # We navigate from immediate son to immediate son... browser.open(make_url('/', **kargs)) tryfollow(make_url('/collection/Articles%20%26%20Preprints', **kargs)) tryfollow(make_url('/collection/Articles', **kargs)) # But we can also jump to a grandson immediately browser.back() browser.back() tryfollow(make_url('/collection/ALEPH', **kargs)) return def test_records_links(self): """ websearch - check the links toward records in leaf collections """ browser = Browser() browser.open(make_url('/collection/Preprints')) def harvest(): """ Parse all the links in the page, and check that for each link to a detailed record, we also have the corresponding link to the similar records.""" records = set() similar = set() for link in browser.links(): path, q = parse_url(link.url) if not path: continue if path[0] == CFG_SITE_RECORD: records.add(int(path[1])) continue if path[0] == 'search': if not q.get('rm') == ['wrd']: continue recid = q['p'][0].split(':')[1] similar.add(int(recid)) self.failUnlessEqual(records, similar) return records # We must have 10 links to the corresponding /CFG_SITE_RECORD found = harvest() self.failUnlessEqual(len(found), 10) # When clicking on the "Search" button, we must also have # these 10 links on the records. browser.select_form(name="search") browser.submit() found = harvest() self.failUnlessEqual(len(found), 10) return class WebSearchTestBrowse(unittest.TestCase): def test_browse_field(self): """ websearch - check that browsing works """ browser = Browser() browser.open(make_url('/')) browser.select_form(name='search') browser['f'] = ['title'] browser.submit(name='action_browse') def collect(): # We'll get a few links to search for the actual hits, plus a # link to the following results. res = [] for link in browser.links(url_regex=re.compile(CFG_SITE_URL + r'/search\?')): if link.text == 'Advanced Search': continue dummy, q = parse_url(link.url) res.append((link, q)) return res # if we follow the last link, we should get another # batch. There is an overlap of one item. batch_1 = collect() browser.follow_link(link=batch_1[-1][0]) batch_2 = collect() # FIXME: we cannot compare the whole query, as the collection # set is not equal self.failUnlessEqual(batch_1[-2][1]['p'], batch_2[0][1]['p']) class WebSearchTestOpenURL(unittest.TestCase): def test_isbn_01(self): """ websearch - isbn query via OpenURL 0.1""" browser = Browser() # We do a precise search in an isolated collection browser.open(make_url('/openurl', isbn='0387940758')) dummy, current_q = parse_url(browser.geturl()) self.failUnlessEqual(current_q, { 'sc' : ['1'], 'p' : ['isbn:"0387940758"'], 'of' : ['hd'] }) def test_isbn_10_rft_id(self): """ websearch - isbn query via OpenURL 1.0 - rft_id""" browser = Browser() # We do a precise search in an isolated collection browser.open(make_url('/openurl', rft_id='urn:ISBN:0387940758')) dummy, current_q = parse_url(browser.geturl()) self.failUnlessEqual(current_q, { 'sc' : ['1'], 'p' : ['isbn:"0387940758"'], 'of' : ['hd'] }) def test_isbn_10(self): """ websearch - isbn query via OpenURL 1.0""" browser = Browser() # We do a precise search in an isolated collection browser.open(make_url('/openurl?rft.isbn=0387940758')) dummy, current_q = parse_url(browser.geturl()) self.failUnlessEqual(current_q, { 'sc' : ['1'], 'p' : ['isbn:"0387940758"'], 'of' : ['hd'] }) class WebSearchTestSearch(unittest.TestCase): def test_hits_in_other_collection(self): """ websearch - check extension of a query to the home collection """ browser = Browser() # We do a precise search in an isolated collection browser.open(make_url('/collection/ISOLDE', ln='en')) browser.select_form(name='search') browser['f'] = ['author'] browser['p'] = 'matsubara' browser.submit() dummy, current_q = parse_url(browser.geturl()) link = browser.find_link(text_regex=re.compile('.*hit', re.I)) dummy, target_q = parse_url(link.url) # the target query should be the current query without any c # or cc specified. for f in ('cc', 'c', 'action_search'): if f in current_q: del current_q[f] self.failUnlessEqual(current_q, target_q) def test_nearest_terms(self): """ websearch - provide a list of nearest terms """ browser = Browser() browser.open(make_url('')) # Search something weird browser.select_form(name='search') browser['p'] = 'gronf' browser.submit() dummy, original = parse_url(browser.geturl()) for to_drop in ('cc', 'action_search', 'f'): if to_drop in original: del original[to_drop] if 'ln' not in original: original['ln'] = [CFG_SITE_LANG] # we should get a few searches back, which are identical # except for the p field being substituted (and the cc field # being dropped). if 'cc' in original: del original['cc'] for link in browser.links(url_regex=re.compile(CFG_SITE_URL + r'/search\?')): if link.text == 'Advanced Search': continue dummy, target = parse_url(link.url) if 'ln' not in target: target['ln'] = [CFG_SITE_LANG] original['p'] = [link.text] self.failUnlessEqual(original, target) return def test_switch_to_simple_search(self): """ websearch - switch to simple search """ browser = Browser() args = {'as': 1} browser.open(make_url('/collection/ISOLDE', **args)) browser.select_form(name='search') browser['p1'] = 'tandem' browser['f1'] = ['title'] browser.submit() browser.follow_link(text='Simple Search') dummy, q = parse_url(browser.geturl()) self.failUnlessEqual(q, {'cc': ['ISOLDE'], 'p': ['tandem'], 'f': ['title'], 'ln': ['en']}) def test_switch_to_advanced_search(self): """ websearch - switch to advanced search """ browser = Browser() browser.open(make_url('/collection/ISOLDE')) browser.select_form(name='search') browser['p'] = 'tandem' browser['f'] = ['title'] browser.submit() browser.follow_link(text='Advanced Search') dummy, q = parse_url(browser.geturl()) self.failUnlessEqual(q, {'cc': ['ISOLDE'], 'p1': ['tandem'], 'f1': ['title'], 'as': ['1'], 'ln' : ['en']}) def test_no_boolean_hits(self): """ websearch - check the 'no boolean hits' proposed links """ browser = Browser() browser.open(make_url('')) browser.select_form(name='search') browser['p'] = 'quasinormal muon' browser.submit() dummy, q = parse_url(browser.geturl()) for to_drop in ('cc', 'action_search', 'f'): if to_drop in q: del q[to_drop] for bsu in ('quasinormal', 'muon'): l = browser.find_link(text=bsu) q['p'] = bsu if not same_urls_p(l.url, make_url('/search', **q)): self.fail(repr((l.url, make_url('/search', **q)))) def test_similar_authors(self): """ websearch - test similar authors box """ browser = Browser() browser.open(make_url('')) browser.select_form(name='search') browser['p'] = 'Ellis, R K' browser['f'] = ['author'] browser.submit() l = browser.find_link(text="Ellis, R S") self.failUnless(same_urls_p(l.url, make_url('/search', p="Ellis, R S", f='author', ln='en'))) class WebSearchTestWildcardLimit(unittest.TestCase): """Checks if the wildcard limit is correctly passed and that users without autorization can not exploit it""" def test_wildcard_limit_correctly_passed_when_not_set(self): """websearch - wildcard limit is correctly passed when default""" self.assertEqual(search_pattern(p='e*', f='author'), search_pattern(p='e*', f='author', wl=1000)) def test_wildcard_limit_correctly_passed_when_set(self): """websearch - wildcard limit is correctly passed when set""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=e*&f=author&of=id&wl=5', expected_text="[9, 10, 11, 17, 46, 48, 50, 51, 52, 53, 54, 67, 72, 74, 81, 88, 92, 96]")) def test_wildcard_limit_correctly_not_active(self): """websearch - wildcard limit is not active when there is no wildcard query""" self.assertEqual(search_pattern(p='ellis', f='author'), search_pattern(p='ellis', f='author', wl=1)) def test_wildcard_limit_increased_by_authorized_users(self): """websearch - wildcard limit increased by authorized user""" browser = Browser() #try a search query, with no wildcard limit set by the user browser.open(make_url('/search?p=a*&of=id')) recid_list_guest_no_limit = browser.response().read() # so the limit is CGF_WEBSEARCH_WILDCARD_LIMIT #try a search query, with a wildcard limit imposed by the user #wl=1000000 - a very high limit,higher then what the CFG_WEBSEARCH_WILDCARD_LIMIT might be browser.open(make_url('/search?p=a*&of=id&wl=1000000')) recid_list_guest_with_limit = browser.response().read() #same results should be returned for a search without the wildcard limit set by the user #and for a search with a large limit set by the user #in this way we know that nomatter how large the limit is, the wildcard query will be #limitted by CFG_WEBSEARCH_WILDCARD_LIMIT (for a guest user) self.failIf(len(recid_list_guest_no_limit.split(',')) != len(recid_list_guest_with_limit.split(','))) ##login as admin browser.open(make_surl('/youraccount/login')) browser.select_form(nr=0) browser['p_un'] = 'admin' browser['p_pw'] = '' browser.submit() #try a search query, with a wildcard limit imposed by an authorized user #wl = 10000 a very high limit, higher then what the CFG_WEBSEARCH_WILDCARD_LIMIT might be browser.open(make_surl('/search?p=a*&of=id&wl=10000')) recid_list_authuser_with_limit = browser.response().read() #the authorized user can set whatever limit he might wish #so, the results returned for the auth. users should exceed the results returned for unauth. users self.failUnless(len(recid_list_guest_no_limit.split(',')) <= len(recid_list_authuser_with_limit.split(','))) #logout browser.open(make_surl('/youraccount/logout')) browser.response().read() browser.close() class WebSearchNearestTermsTest(unittest.TestCase): """Check various alternatives of searches leading to the nearest terms box.""" def test_nearest_terms_box_in_okay_query(self): """ websearch - no nearest terms box for a successful query """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis', expected_text="jump to record")) def test_nearest_terms_box_in_unsuccessful_simple_query(self): """ websearch - nearest terms box for unsuccessful simple query """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellisz', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=embed", expected_link_label='embed')) def test_nearest_terms_box_in_unsuccessful_simple_accented_query(self): """ websearch - nearest terms box for unsuccessful accented query """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=elliszà', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=embed", expected_link_label='embed')) def test_nearest_terms_box_in_unsuccessful_structured_query(self): """ websearch - nearest terms box for unsuccessful structured query """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellisz&f=author', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=fabbro&f=author", expected_link_label='fabbro')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=author%3Aellisz', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=author%3Afabbro", expected_link_label='fabbro')) def test_nearest_terms_box_in_query_with_invalid_index(self): """ websearch - nearest terms box for queries with invalid indexes specified """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=bednarz%3Aellis', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=bednarz", expected_link_label='bednarz')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=1%3Aellis', expected_text="no index 1.", expected_link_target=CFG_SITE_URL+"/record/47?ln=en", expected_link_label="Detailed record")) def test_nearest_terms_box_in_unsuccessful_phrase_query(self): """ websearch - nearest terms box for unsuccessful phrase query """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=author%3A%22Ellis%2C+Z%22', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=author%3A%22Enqvist%2C+K%22", expected_link_label='Enqvist, K')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=%22ellisz%22&f=author', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=%22Enqvist%2C+K%22&f=author", expected_link_label='Enqvist, K')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=%22elliszà%22&f=author', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=%22Enqvist%2C+K%22&f=author", expected_link_label='Enqvist, K')) def test_nearest_terms_box_in_unsuccessful_partial_phrase_query(self): """ websearch - nearest terms box for unsuccessful partial phrase query """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=author%3A%27Ellis%2C+Z%27', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=author%3A%27Enqvist%2C+K%27", expected_link_label='Enqvist, K')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=%27ellisz%27&f=author', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=%27Enqvist%2C+K%27&f=author", expected_link_label='Enqvist, K')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=%27elliszà%27&f=author', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=%27Enqvist%2C+K%27&f=author", expected_link_label='Enqvist, K')) def test_nearest_terms_box_in_unsuccessful_partial_phrase_advanced_query(self): """ websearch - nearest terms box for unsuccessful partial phrase advanced search query """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p1=aaa&f1=title&m1=p&as=1', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&f1=title&as=1&p1=A+simple+functional+form+for+proton-nucleus+total+reaction+cross+sections&m1=p", expected_link_label='A simple functional form for proton-nucleus total reaction cross sections')) def test_nearest_terms_box_in_unsuccessful_exact_phrase_advanced_query(self): """ websearch - nearest terms box for unsuccessful exact phrase advanced search query """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p1=aaa&f1=title&m1=e&as=1', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&f1=title&as=1&p1=A+simple+functional+form+for+proton-nucleus+total+reaction+cross+sections&m1=e", expected_link_label='A simple functional form for proton-nucleus total reaction cross sections')) def test_nearest_terms_box_in_unsuccessful_boolean_query(self): """ websearch - nearest terms box for unsuccessful boolean query """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=title%3Aellisz+author%3Aellisz', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=title%3Aenergi+author%3Aellisz", expected_link_label='energi')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=title%3Aenergi+author%3Aenergie', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=title%3Aenergi+author%3Aenqvist", expected_link_label='enqvist')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?ln=en&p=title%3Aellisz+author%3Aellisz&f=keyword', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=title%3Aenergi+author%3Aellisz&f=keyword", expected_link_label='energi')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?ln=en&p=title%3Aenergi+author%3Aenergie&f=keyword', expected_text="Nearest terms in any collection are", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=title%3Aenergi+author%3Aenqvist&f=keyword", expected_link_label='enqvist')) class WebSearchBooleanQueryTest(unittest.TestCase): """Check various boolean queries.""" def test_successful_boolean_query(self): """ websearch - successful boolean query """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis+muon', expected_text="records found", expected_link_label="Detailed record")) def test_unsuccessful_boolean_query_where_all_individual_terms_match(self): """ websearch - unsuccessful boolean query where all individual terms match """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis+muon+letter', expected_text="Boolean query returned no hits. Please combine your search terms differently.")) class WebSearchAuthorQueryTest(unittest.TestCase): """Check various author-related queries.""" def test_propose_similar_author_names_box(self): """ websearch - propose similar author names box """ self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=Ellis%2C+R&f=author', expected_text="See also: similar author names", expected_link_target=CFG_SITE_URL+"/search?ln=en&p=Ellis%2C+R+K&f=author", expected_link_label="Ellis, R K")) def test_do_not_propose_similar_author_names_box(self): """ websearch - do not propose similar author names box """ errmsgs = test_web_page_content(CFG_SITE_URL + '/search?p=author%3A%22Ellis%2C+R%22', expected_link_target=CFG_SITE_URL+"/search?ln=en&p=Ellis%2C+R+K&f=author", expected_link_label="Ellis, R K") if errmsgs[0].find("does not contain link to") > -1: pass else: self.fail("Should not propose similar author names box.") return class WebSearchSearchEnginePythonAPITest(unittest.TestCase): """Check typical search engine Python API calls on the demo data.""" def test_search_engine_python_api_for_failed_query(self): """websearch - search engine Python API for failed query""" self.assertEqual([], perform_request_search(p='aoeuidhtns')) def test_search_engine_python_api_for_successful_query(self): """websearch - search engine Python API for successful query""" self.assertEqual([8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 47], perform_request_search(p='ellis')) def test_search_engine_python_api_for_existing_record(self): """websearch - search engine Python API for existing record""" self.assertEqual([8], perform_request_search(recid=8)) def test_search_engine_python_api_for_nonexisting_record(self): """websearch - search engine Python API for non-existing record""" self.assertEqual([], perform_request_search(recid=1234567809)) def test_search_engine_python_api_for_nonexisting_collection(self): """websearch - search engine Python API for non-existing collection""" self.assertEqual([], perform_request_search(c='Foo')) def test_search_engine_python_api_for_range_of_records(self): """websearch - search engine Python API for range of records""" self.assertEqual([1, 2, 3, 4, 5, 6, 7, 8, 9], perform_request_search(recid=1, recidb=10)) def test_search_engine_python_api_ranked_by_citation(self): """websearch - search engine Python API for citation ranking""" self.assertEqual([82, 83, 87, 89], perform_request_search(p='recid:81', rm='citation')) def test_search_engine_python_api_textmarc(self): """websearch - search engine Python API for Text MARC output""" # we are testing example from /help/hacking/search-engine-api import cStringIO tmp = cStringIO.StringIO() perform_request_search(req=tmp, p='higgs', of='tm', ot=['100', '700']) out = tmp.getvalue() tmp.close() self.assertEqual(out, """\ 000000085 100__ $$aGirardello, L$$uINFN$$uUniversita di Milano-Bicocca 000000085 700__ $$aPorrati, Massimo 000000085 700__ $$aZaffaroni, A 000000001 100__ $$aPhotolab """) class WebSearchSearchEngineWebAPITest(unittest.TestCase): """Check typical search engine Web API calls on the demo data.""" def test_search_engine_web_api_for_failed_query(self): """websearch - search engine Web API for failed query""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=aoeuidhtns&of=id', expected_text="[]")) def test_search_engine_web_api_for_successful_query(self): """websearch - search engine Web API for successful query""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis&of=id', expected_text="[8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 47]")) def test_search_engine_web_api_for_existing_record(self): """websearch - search engine Web API for existing record""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?recid=8&of=id', expected_text="[8]")) def test_search_engine_web_api_for_nonexisting_record(self): """websearch - search engine Web API for non-existing record""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?recid=123456789&of=id', expected_text="[]")) def test_search_engine_web_api_for_nonexisting_collection(self): """websearch - search engine Web API for non-existing collection""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?c=Foo&of=id', expected_text="[]")) def test_search_engine_web_api_for_range_of_records(self): """websearch - search engine Web API for range of records""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?recid=1&recidb=10&of=id', expected_text="[1, 2, 3, 4, 5, 6, 7, 8, 9]")) class WebSearchRestrictedCollectionTest(unittest.TestCase): """Test of the restricted Theses collection behaviour.""" def test_restricted_collection_interface_page(self): """websearch - restricted collection interface page body""" # there should be no Latest additions box for restricted collections self.assertNotEqual([], test_web_page_content(CFG_SITE_URL + '/collection/Theses', expected_text="Latest additions")) def test_restricted_search_as_anonymous_guest(self): """websearch - restricted collection not searchable by anonymous guest""" browser = Browser() browser.open(CFG_SITE_URL + '/search?c=Theses') response = browser.response().read() if response.find("If you think you have right to access it, please authenticate yourself.") > -1: pass else: self.fail("Oops, searching restricted collection without password should have redirected to login dialog.") return def test_restricted_search_as_authorized_person(self): """websearch - restricted collection searchable by authorized person""" browser = Browser() browser.open(CFG_SITE_URL + '/search?c=Theses') browser.select_form(nr=0) browser['p_un'] = 'jekyll' browser['p_pw'] = 'j123ekyll' browser.submit() if browser.response().read().find("records found") > -1: pass else: self.fail("Oops, Dr. Jekyll should be able to search Theses collection.") def test_restricted_search_as_unauthorized_person(self): """websearch - restricted collection not searchable by unauthorized person""" browser = Browser() browser.open(CFG_SITE_URL + '/search?c=Theses') browser.select_form(nr=0) browser['p_un'] = 'hyde' browser['p_pw'] = 'h123yde' browser.submit() # Mr. Hyde should not be able to connect: if browser.response().read().find("Authorization failure") <= -1: # if we got here, things are broken: self.fail("Oops, Mr.Hyde should not be able to search Theses collection.") def test_restricted_detailed_record_page_as_anonymous_guest(self): """websearch - restricted detailed record page not accessible to guests""" browser = Browser() browser.open(CFG_SITE_URL + '/%s/35' % CFG_SITE_RECORD) if browser.response().read().find("You can use your nickname or your email address to login.") > -1: pass else: self.fail("Oops, searching restricted collection without password should have redirected to login dialog.") return def test_restricted_detailed_record_page_as_authorized_person(self): """websearch - restricted detailed record page accessible to authorized person""" browser = Browser() browser.open(CFG_SITE_URL + '/youraccount/login') browser.select_form(nr=0) browser['p_un'] = 'jekyll' browser['p_pw'] = 'j123ekyll' browser.submit() browser.open(CFG_SITE_URL + '/%s/35' % CFG_SITE_RECORD) # Dr. Jekyll should be able to connect # (add the pw to the whole CFG_SITE_URL because we shall be # redirected to '/reordrestricted/'): if browser.response().read().find("A High-performance Video Browsing System") > -1: pass else: self.fail("Oops, Dr. Jekyll should be able to access restricted detailed record page.") def test_restricted_detailed_record_page_as_unauthorized_person(self): """websearch - restricted detailed record page not accessible to unauthorized person""" browser = Browser() browser.open(CFG_SITE_URL + '/youraccount/login') browser.select_form(nr=0) browser['p_un'] = 'hyde' browser['p_pw'] = 'h123yde' browser.submit() browser.open(CFG_SITE_URL + '/%s/35' % CFG_SITE_RECORD) # Mr. Hyde should not be able to connect: if browser.response().read().find('You are not authorized') <= -1: # if we got here, things are broken: self.fail("Oops, Mr.Hyde should not be able to access restricted detailed record page.") def test_collection_restricted_p(self): """websearch - collection_restricted_p""" self.failUnless(collection_restricted_p('Theses'), True) self.failIf(collection_restricted_p('Books & Reports')) def test_get_permitted_restricted_collections(self): """websearch - get_permitted_restricted_collections""" from invenio.webuser import get_uid_from_email, collect_user_info self.assertEqual(get_permitted_restricted_collections(collect_user_info(get_uid_from_email('jekyll@cds.cern.ch'))), ['Theses']) self.assertEqual(get_permitted_restricted_collections(collect_user_info(get_uid_from_email('hyde@cds.cern.ch'))), []) class WebSearchRestrictedPicturesTest(unittest.TestCase): """ Check whether restricted pictures on the demo site can be accessed well by people who have rights to access them. """ def test_restricted_pictures_guest(self): """websearch - restricted pictures not available to guest""" error_messages = test_web_page_content(CFG_SITE_URL + '/%s/1/files/0106015_01.jpg' % CFG_SITE_RECORD, expected_text=['This file is restricted. If you think you have right to access it, please authenticate yourself.']) if error_messages: self.fail(merge_error_messages(error_messages)) def test_restricted_pictures_romeo(self): """websearch - restricted pictures available to Romeo""" error_messages = test_web_page_content(CFG_SITE_URL + '/%s/1/files/0106015_01.jpg' % CFG_SITE_RECORD, username='romeo', password='r123omeo', expected_text=[], unexpected_text=['This file is restricted', 'You are not authorized']) if error_messages: self.fail(merge_error_messages(error_messages)) def test_restricted_pictures_hyde(self): """websearch - restricted pictures not available to Mr. Hyde""" error_messages = test_web_page_content(CFG_SITE_URL + '/%s/1/files/0106015_01.jpg' % CFG_SITE_RECORD, username='hyde', password='h123yde', expected_text=['This file is restricted', 'You are not authorized']) if error_messages: self.failUnless("HTTP Error 401: Unauthorized" in merge_error_messages(error_messages)) class WebSearchRSSFeedServiceTest(unittest.TestCase): """Test of the RSS feed service.""" def test_rss_feed_service(self): """websearch - RSS feed service""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/rss', expected_text=' -1: self.fail("Oops, when split by collection is off, " "results overview should not be present.") if body.find('') == -1: self.fail("Oops, when split by collection is off, " "Atlantis collection should be found.") if body.find('') > -1: self.fail("Oops, when split by collection is off, " "Multimedia & Arts should not be found.") try: browser.find_link(url='#15') self.fail("Oops, when split by collection is off, " "a link to Multimedia & Arts should not be found.") except LinkNotFoundError: pass def test_results_overview_split_on(self): """websearch - results overview box when split by collection is on""" browser = Browser() browser.open(CFG_SITE_URL + '/search?p=of&sc=1') body = browser.response().read() if body.find("Results overview") == -1: self.fail("Oops, when split by collection is on, " "results overview should be present.") if body.find('') > -1: self.fail("Oops, when split by collection is on, " "Atlantis collection should not be found.") if body.find('') == -1: self.fail("Oops, when split by collection is on, " "Multimedia & Arts should be found.") try: browser.find_link(url='#15') except LinkNotFoundError: self.fail("Oops, when split by collection is on, " "a link to Multimedia & Arts should be found.") class WebSearchSortResultsTest(unittest.TestCase): """Test of the search results page's sorting capability.""" def test_sort_results_default(self): """websearch - search results sorting, default method""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=of&f=title&rg=1', expected_text="[TESLA-FEL-99-07]")) def test_sort_results_ascending(self): """websearch - search results sorting, ascending field""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=of&f=title&rg=1&sf=reportnumber&so=a', expected_text="ISOLTRAP")) def test_sort_results_descending(self): """websearch - search results sorting, descending field""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=of&f=title&rg=1&sf=reportnumber&so=d', expected_text=" [TESLA-FEL-99-07]")) def test_sort_results_sort_pattern(self): """websearch - search results sorting, preferential sort pattern""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=of&f=title&rg=1&sf=reportnumber&so=d&sp=cern', expected_text="[CERN-TH-2002-069]")) class WebSearchSearchResultsXML(unittest.TestCase): """Test search results in various output""" def test_search_results_xm_output_split_on(self): """ websearch - check document element of search results in xm output (split by collection on)""" browser = Browser() browser.open(CFG_SITE_URL + '/search?sc=1&of=xm') body = browser.response().read() num_doc_element = body.count("") if num_doc_element == 0: self.fail("Oops, no document element " "found in search results.") elif num_doc_element > 1: self.fail("Oops, multiple document elements " "found in search results.") num_doc_element = body.count("") if num_doc_element == 0: self.fail("Oops, no document element " "found in search results.") elif num_doc_element > 1: self.fail("Oops, multiple document elements " "found in search results.") def test_search_results_xm_output_split_off(self): """ websearch - check document element of search results in xm output (split by collection off)""" browser = Browser() browser.open(CFG_SITE_URL + '/search?sc=0&of=xm') body = browser.response().read() num_doc_element = body.count("") if num_doc_element == 0: self.fail("Oops, no document element " "found in search results.") elif num_doc_element > 1: self.fail("Oops, multiple document elements " "found in search results.") num_doc_element = body.count("") if num_doc_element == 0: self.fail("Oops, no document element " "found in search results.") elif num_doc_element > 1: self.fail("Oops, multiple document elements " "found in search results.") def test_search_results_xd_output_split_on(self): """ websearch - check document element of search results in xd output (split by collection on)""" browser = Browser() browser.open(CFG_SITE_URL + '/search?sc=1&of=xd') body = browser.response().read() num_doc_element = body.count("" "found in search results.") elif num_doc_element > 1: self.fail("Oops, multiple document elements " "found in search results.") num_doc_element = body.count("") if num_doc_element == 0: self.fail("Oops, no document element " "found in search results.") elif num_doc_element > 1: self.fail("Oops, multiple document elements " "found in search results.") def test_search_results_xd_output_split_off(self): """ websearch - check document element of search results in xd output (split by collection off)""" browser = Browser() browser.open(CFG_SITE_URL + '/search?sc=0&of=xd') body = browser.response().read() num_doc_element = body.count("") if num_doc_element == 0: self.fail("Oops, no document element " "found in search results.") elif num_doc_element > 1: self.fail("Oops, multiple document elements " "found in search results.") num_doc_element = body.count("") if num_doc_element == 0: self.fail("Oops, no document element " "found in search results.") elif num_doc_element > 1: self.fail("Oops, multiple document elements " "found in search results.") class WebSearchUnicodeQueryTest(unittest.TestCase): """Test of the search results for queries containing Unicode characters.""" def test_unicode_word_query(self): """websearch - Unicode word query""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=title%3A%CE%99%CE%B8%CE%AC%CE%BA%CE%B7', expected_text="[76]")) def test_unicode_word_query_not_found_term(self): """websearch - Unicode word query, not found term""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=title%3A%CE%99%CE%B8', expected_text="ιθάκη")) def test_unicode_exact_phrase_query(self): """websearch - Unicode exact phrase query""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=title%3A%22%CE%99%CE%B8%CE%AC%CE%BA%CE%B7%22', expected_text="[76]")) def test_unicode_partial_phrase_query(self): """websearch - Unicode partial phrase query""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=title%3A%27%CE%B7%27', expected_text="[76]")) def test_unicode_regexp_query(self): """websearch - Unicode regexp query""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=title%3A%2F%CE%B7%2F', expected_text="[76]")) class WebSearchMARCQueryTest(unittest.TestCase): """Test of the search results for queries containing physical MARC tags.""" def test_single_marc_tag_exact_phrase_query(self): """websearch - single MARC tag, exact phrase query (100__a)""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=100__a%3A%22Ellis%2C+J%22', expected_text="[9, 14, 18]")) def test_single_marc_tag_partial_phrase_query(self): """websearch - single MARC tag, partial phrase query (245__b)""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=245__b%3A%27and%27', expected_text="[28]")) def test_many_marc_tags_partial_phrase_query(self): """websearch - many MARC tags, partial phrase query (245)""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=245%3A%27and%27', expected_text="[1, 8, 9, 14, 15, 20, 22, 24, 28, 33, 47, 48, 49, 51, 53, 64, 69, 71, 79, 82, 83, 85, 91, 96]")) def test_single_marc_tag_regexp_query(self): """websearch - single MARC tag, regexp query""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=245%3A%2Fand%2F', expected_text="[1, 8, 9, 14, 15, 20, 22, 24, 28, 33, 47, 48, 49, 51, 53, 64, 69, 71, 79, 82, 83, 85, 91, 96]")) class WebSearchExtSysnoQueryTest(unittest.TestCase): """Test of queries using external system numbers.""" def test_existing_sysno_html_output(self): """websearch - external sysno query, existing sysno, HTML output""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?sysno=000289446CER', expected_text="The wall of the cave")) def test_existing_sysno_id_output(self): """websearch - external sysno query, existing sysno, ID output""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?sysno=000289446CER&of=id', expected_text="[95]")) def test_nonexisting_sysno_html_output(self): """websearch - external sysno query, non-existing sysno, HTML output""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?sysno=000289446CERRRR', expected_text="Requested record does not seem to exist.")) def test_nonexisting_sysno_id_output(self): """websearch - external sysno query, non-existing sysno, ID output""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?sysno=000289446CERRRR&of=id', expected_text="[]")) class WebSearchResultsRecordGroupingTest(unittest.TestCase): """Test search results page record grouping (rg).""" def test_search_results_rg_guest(self): """websearch - search results, records in groups of, guest""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?rg=17', expected_text="1 - 17")) def test_search_results_rg_nonguest(self): """websearch - search results, records in groups of, non-guest""" # This test used to fail due to saved user preference fetching # not overridden by URL rg argument. self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?rg=17', username='admin', expected_text="1 - 17")) class WebSearchSpecialTermsQueryTest(unittest.TestCase): """Test of the search results for queries containing special terms.""" def test_special_terms_u1(self): """websearch - query for special terms, U(1)""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=U%281%29', expected_text="[57, 79, 80, 88]")) def test_special_terms_u1_and_sl(self): """websearch - query for special terms, U(1) SL(2,Z)""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=U%281%29+SL%282%2CZ%29', expected_text="[88]")) def test_special_terms_u1_and_sl_or(self): """websearch - query for special terms, U(1) OR SL(2,Z)""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=U%281%29+OR+SL%282%2CZ%29', expected_text="[57, 79, 80, 88]")) def test_special_terms_u1_and_sl_or_parens(self): """websearch - query for special terms, (U(1) OR SL(2,Z))""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=%28U%281%29+OR+SL%282%2CZ%29%29', expected_text="[57, 79, 80, 88]")) class WebSearchJournalQueryTest(unittest.TestCase): """Test of the search results for journal pubinfo queries.""" def test_query_journal_title_only(self): """websearch - journal publication info query, title only""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&f=journal&p=Phys.+Lett.+B', expected_text="[77, 78, 85, 87]")) def test_query_journal_full_pubinfo(self): """websearch - journal publication info query, full reference""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&f=journal&p=Phys.+Lett.+B+531+%282002%29+301', expected_text="[78]")) class WebSearchStemmedIndexQueryTest(unittest.TestCase): """Test of the search results for queries using stemmed indexes.""" def test_query_stemmed_lowercase(self): """websearch - stemmed index query, lowercase""" # note that dasse/Dasse is stemmed into dass/Dass, as expected self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=dasse', expected_text="[25, 26]")) def test_query_stemmed_uppercase(self): """websearch - stemmed index query, uppercase""" # ... but note also that DASSE is stemmed into DASSE(!); so # the test would fail if the search engine would not lower the # query term. (Something that is not necessary for # non-stemmed indexes.) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?of=id&p=DASSE', expected_text="[25, 26]")) class WebSearchSummarizerTest(unittest.TestCase): """Test of the search results summarizer functions.""" def test_most_popular_field_values_singletag(self): """websearch - most popular field values, simple tag""" from invenio.search_engine import get_most_popular_field_values self.assertEqual((('PREPRINT', 37), ('ARTICLE', 28), ('BOOK', 14), ('THESIS', 8), ('PICTURE', 7), ('POETRY', 2), ('REPORT', 2), ('ATLANTISTIMESNEWS', 1)), get_most_popular_field_values(range(0,100), '980__a')) def test_most_popular_field_values_singletag_multiexclusion(self): """websearch - most popular field values, simple tag, multiple exclusions""" from invenio.search_engine import get_most_popular_field_values self.assertEqual((('PREPRINT', 37), ('ARTICLE', 28), ('BOOK', 14), ('REPORT', 2), ('ATLANTISTIMESNEWS', 1)), get_most_popular_field_values(range(0,100), '980__a', ('THESIS', 'PICTURE', 'POETRY'))) def test_most_popular_field_values_multitag(self): """websearch - most popular field values, multiple tags""" from invenio.search_engine import get_most_popular_field_values self.assertEqual((('Ellis, J', 3), ('Enqvist, K', 1), ('Ibanez, L E', 1), ('Nanopoulos, D V', 1), ('Ross, G G', 1)), get_most_popular_field_values((9, 14, 18), ('100__a', '700__a'))) def test_most_popular_field_values_multitag_singleexclusion(self): """websearch - most popular field values, multiple tags, single exclusion""" from invenio.search_engine import get_most_popular_field_values self.assertEqual((('Enqvist, K', 1), ('Ibanez, L E', 1), ('Nanopoulos, D V', 1), ('Ross, G G', 1)), get_most_popular_field_values((9, 14, 18), ('100__a', '700__a'), ('Ellis, J'))) def test_most_popular_field_values_multitag_countrepetitive(self): """websearch - most popular field values, multiple tags, counting repetitive occurrences""" from invenio.search_engine import get_most_popular_field_values self.assertEqual((('THESIS', 2), ('REPORT', 1)), get_most_popular_field_values((41,), ('690C_a', '980__a'), count_repetitive_values=True)) self.assertEqual((('REPORT', 1), ('THESIS', 1)), get_most_popular_field_values((41,), ('690C_a', '980__a'), count_repetitive_values=False)) def test_ellis_citation_summary(self): """websearch - query ellis, citation summary output format""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis&of=hcs', expected_text="Less known papers (1-9)", expected_link_target=CFG_SITE_URL+"/search?p=ellis%20AND%20cited%3A1-%3E9&rm=citation", expected_link_label='1')) def test_ellis_not_quark_citation_summary_advanced(self): """websearch - ellis and not quark, citation summary format advanced""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?ln=en&as=1&m1=a&p1=ellis&f1=author&op1=n&m2=a&p2=quark&f2=&op2=a&m3=a&p3=&f3=&action_search=Search&sf=&so=a&rm=&rg=10&sc=1&of=hcs', expected_text="Less known papers (1-9)", expected_link_target=CFG_SITE_URL+'/search?p=author%3Aellis%20and%20not%20quark%20AND%20cited%3A1-%3E9&rm=citation', expected_link_label='1')) def test_ellis_not_quark_citation_summary_regular(self): """websearch - ellis and not quark, citation summary format advanced""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?ln=en&p=author%3Aellis+and+not+quark&f=&action_search=Search&sf=&so=d&rm=&rg=10&sc=0&of=hcs', expected_text="Less known papers (1-9)", expected_link_target=CFG_SITE_URL+'/search?p=author%3Aellis%20and%20not%20quark%20AND%20cited%3A1-%3E9&rm=citation', expected_link_label='1')) class WebSearchRecordCollectionGuessTest(unittest.TestCase): """Primary collection guessing tests.""" def test_guess_primary_collection_of_a_record(self): """websearch - guess_primary_collection_of_a_record""" self.assertEqual(guess_primary_collection_of_a_record(96), 'Articles') def test_guess_collection_of_a_record(self): """websearch - guess_collection_of_a_record""" self.assertEqual(guess_collection_of_a_record(96), 'Articles') self.assertEqual(guess_collection_of_a_record(96, '%s/collection/Theoretical Physics (TH)?ln=en' % CFG_SITE_URL), 'Articles') self.assertEqual(guess_collection_of_a_record(12, '%s/collection/Theoretical Physics (TH)?ln=en' % CFG_SITE_URL), 'Theoretical Physics (TH)') self.assertEqual(guess_collection_of_a_record(12, '%s/collection/Theoretical%%20Physics%%20%%28TH%%29?ln=en' % CFG_SITE_URL), 'Theoretical Physics (TH)') class WebSearchGetFieldValuesTest(unittest.TestCase): """Testing get_fieldvalues() function.""" def test_get_fieldvalues_001(self): """websearch - get_fieldvalues() for bibxxx-agnostic tags""" self.assertEqual(get_fieldvalues(10, '001___'), ['10']) def test_get_fieldvalues_980(self): """websearch - get_fieldvalues() for bibxxx-powered tags""" self.assertEqual(get_fieldvalues(18, '700__a'), ['Enqvist, K', 'Nanopoulos, D V']) self.assertEqual(get_fieldvalues(18, '909C1u'), ['CERN']) def test_get_fieldvalues_wildcard(self): """websearch - get_fieldvalues() for tag wildcards""" self.assertEqual(get_fieldvalues(18, '%'), []) self.assertEqual(get_fieldvalues(18, '7%'), []) self.assertEqual(get_fieldvalues(18, '700%'), ['Enqvist, K', 'Nanopoulos, D V']) self.assertEqual(get_fieldvalues(18, '909C0%'), ['1985', '13','TH']) def test_get_fieldvalues_recIDs(self): """websearch - get_fieldvalues() for list of recIDs""" self.assertEqual(get_fieldvalues([], '001___'), []) self.assertEqual(get_fieldvalues([], '700__a'), []) self.assertEqual(get_fieldvalues([10, 13], '001___'), ['10', '13']) self.assertEqual(get_fieldvalues([18, 13], '700__a'), ['Dawson, S', 'Ellis, R K', 'Enqvist, K', 'Nanopoulos, D V']) def test_get_fieldvalues_repetitive(self): """websearch - get_fieldvalues() for repetitive values""" self.assertEqual(get_fieldvalues([17, 18], '909C1u'), ['CERN', 'CERN']) self.assertEqual(get_fieldvalues([17, 18], '909C1u', repetitive_values=True), ['CERN', 'CERN']) self.assertEqual(get_fieldvalues([17, 18], '909C1u', repetitive_values=False), ['CERN']) class WebSearchAddToBasketTest(unittest.TestCase): """Test of the add-to-basket presence depending on user rights.""" def test_add_to_basket_guest(self): """websearch - add-to-basket facility allowed for guests""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10', expected_text='Add to basket')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10', expected_text='')) def test_add_to_basket_jekyll(self): """websearch - add-to-basket facility allowed for Dr. Jekyll""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10', expected_text='Add to basket', username='jekyll', password='j123ekyll')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10', expected_text='', username='jekyll', password='j123ekyll')) def test_add_to_basket_hyde(self): """websearch - add-to-basket facility denied to Mr. Hyde""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10', unexpected_text='Add to basket', username='hyde', password='h123yde')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=recid%3A10', unexpected_text='', username='hyde', password='h123yde')) class WebSearchAlertTeaserTest(unittest.TestCase): """Test of the alert teaser presence depending on user rights.""" def test_alert_teaser_guest(self): """websearch - alert teaser allowed for guests""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis', expected_link_label='email alert')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis', expected_text='RSS feed')) def test_alert_teaser_jekyll(self): """websearch - alert teaser allowed for Dr. Jekyll""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis', expected_text='email alert', username='jekyll', password='j123ekyll')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis', expected_text='RSS feed', username='jekyll', password='j123ekyll')) def test_alert_teaser_hyde(self): """websearch - alert teaser allowed for Mr. Hyde""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis', expected_text='email alert', username='hyde', password='h123yde')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=ellis', expected_text='RSS feed', username='hyde', password='h123yde')) class WebSearchSpanQueryTest(unittest.TestCase): """Test of span queries.""" def test_span_in_word_index(self): """websearch - span query in a word index""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=year%3A1992-%3E1996&of=id&ap=0', expected_text='[17, 66, 69, 71]')) def test_span_in_phrase_index(self): """websearch - span query in a phrase index""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=year%3A%221992%22-%3E%221996%22&of=id&ap=0', expected_text='[17, 66, 69, 71]')) def test_span_in_bibxxx(self): """websearch - span query in MARC tables""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=909C0y%3A%221992%22-%3E%221996%22&of=id&ap=0', expected_text='[17, 66, 69, 71]')) def test_span_with_spaces(self): """websearch - no span query when a space is around""" # useful for reaction search self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=title%3A%27mu%20--%3E%20e%27&of=id&ap=0', expected_text='[67]')) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=245%3A%27mu%20--%3E%20e%27&of=id&ap=0', expected_text='[67]')) def test_span_in_author(self): """websearch - span query in special author index""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=author%3A%22Ellis,%20K%22-%3E%22Ellis,%20RZ%22&of=id&ap=0', expected_text='[8, 11, 13, 17, 47]')) class WebSearchReferstoCitedbyTest(unittest.TestCase): """Test of refersto/citedby search operators.""" def test_refersto_recid(self): 'websearch - refersto:recid:84' self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=refersto%3Arecid%3A84&of=id&ap=0', expected_text='[85, 88, 91]')) def test_refersto_repno(self): 'websearch - refersto:reportnumber:hep-th/0205061' self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=refersto%3Areportnumber%3Ahep-th/0205061&of=id&ap=0', expected_text='[91]')) def test_refersto_author_word(self): 'websearch - refersto:author:klebanov' self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=refersto%3Aauthor%3Aklebanov&of=id&ap=0', expected_text='[85, 86, 88, 91]')) def test_refersto_author_phrase(self): 'websearch - refersto:author:"Klebanov, I"' self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=refersto%3Aauthor%3A%22Klebanov,%20I%22&of=id&ap=0', expected_text='[85, 86, 88, 91]')) def test_citedby_recid(self): 'websearch - citedby:recid:92' self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=citedby%3Arecid%3A92&of=id&ap=0', expected_text='[74, 91]')) def test_citedby_repno(self): 'websearch - citedby:reportnumber:hep-th/0205061' self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=citedby%3Areportnumber%3Ahep-th/0205061&of=id&ap=0', expected_text='[78]')) def test_citedby_author_word(self): 'websearch - citedby:author:klebanov' self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=citedby%3Aauthor%3Aklebanov&of=id&ap=0', expected_text='[95]')) def test_citedby_author_phrase(self): 'websearch - citedby:author:"Klebanov, I"' self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=citedby%3Aauthor%3A%22Klebanov,%20I%22&of=id&ap=0', expected_text='[95]')) def test_refersto_bad_query(self): 'websearch - refersto:title:' self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=refersto%3Atitle%3A', expected_text='There are no records referring to title:.')) def test_citedby_bad_query(self): 'websearch - citedby:title:' self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=citedby%3Atitle%3A', expected_text='There are no records cited by title:.')) class WebSearchSPIRESSyntaxTest(unittest.TestCase): """Test of SPIRES syntax issues""" def test_and_not_parens(self): 'websearch - find a ellis, j and not a enqvist' self.assertEqual([], test_web_page_content(CFG_SITE_URL +'/search?p=find+a+ellis%2C+j+and+not+a+enqvist&of=id&ap=0', expected_text='[9, 12, 14, 47]')) def test_dadd_search(self): 'websearch - find da > today - 3650' # XXX: assumes we've reinstalled our site in the last 10 years # should return every document in the system self.assertEqual([], test_web_page_content(CFG_SITE_URL +'/search?ln=en&p=find+da+%3E+today+-+3650&f=&of=id', expected_text='[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104]')) class WebSearchDateQueryTest(unittest.TestCase): """Test various date queries.""" def setUp(self): """Establish variables we plan to re-use""" from invenio.intbitset import intbitset as HitSet self.empty = HitSet() def test_search_unit_hits_for_datecreated_previous_millenia(self): """websearch - search_unit with datecreated returns >0 hits for docs in the last 1000 years""" self.assertNotEqual(self.empty, search_unit('1000-01-01->9999', 'datecreated')) def test_search_unit_hits_for_datemodified_previous_millenia(self): """websearch - search_unit with datemodified returns >0 hits for docs in the last 1000 years""" self.assertNotEqual(self.empty, search_unit('1000-01-01->9999', 'datemodified')) def test_search_unit_in_bibrec_for_datecreated_previous_millenia(self): """websearch - search_unit_in_bibrec with creationdate gets >0 hits for past 1000 years""" self.assertNotEqual(self.empty, search_unit_in_bibrec("1000-01-01", "9999-12-31", 'creationdate')) def test_search_unit_in_bibrec_for_datecreated_next_millenia(self): """websearch - search_unit_in_bibrec with creationdate gets 0 hits for after year 3000""" self.assertEqual(self.empty, search_unit_in_bibrec("3000-01-01", "9999-12-31", 'creationdate')) class WebSearchSynonymQueryTest(unittest.TestCase): """Test of queries using synonyms.""" def test_journal_phrvd(self): """websearch - search-time synonym search, journal title""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=PHRVD&f=journal&of=id', expected_text="[66, 72]")) def test_journal_phrvd_54_1996_4234(self): """websearch - search-time synonym search, journal article""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=PHRVD%2054%20%281996%29%204234&f=journal&of=id', expected_text="[66]")) def test_journal_beta_decay_title(self): """websearch - index-time synonym search, beta decay in title""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=beta+decay&f=title&of=id', expected_text="[59]")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=%CE%B2+decay&f=title&of=id', expected_text="[59]")) def test_journal_beta_decay_global(self): """websearch - index-time synonym search, beta decay in any field""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=beta+decay&of=id', expected_text="[52, 59]")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=%CE%B2+decay&of=id', expected_text="[52, 59]")) def test_journal_beta_title(self): """websearch - index-time synonym search, beta in title""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=beta&f=title&of=id', expected_text="[59]")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=%CE%B2&f=title&of=id', expected_text="[59]")) def test_journal_beta_global(self): """websearch - index-time synonym search, beta in any field""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=beta&of=id', expected_text="[52, 59]")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/search?p=%CE%B2&of=id', expected_text="[52, 59]")) class WebSearchWashCollectionsTest(unittest.TestCase): """Test if the collection argument is washed correctly""" def test_wash_coll_when_coll_restricted(self): """websearch - washing of restricted daughter collections""" self.assertEqual( sorted(wash_colls(cc='', c=['Books & Reports', 'Theses'])[1]), ['Books & Reports', 'Theses']) self.assertEqual( sorted(wash_colls(cc='', c=['Books & Reports', 'Theses'])[2]), ['Books & Reports', 'Theses']) TEST_SUITE = make_test_suite(WebSearchWebPagesAvailabilityTest, WebSearchTestSearch, WebSearchTestBrowse, WebSearchTestOpenURL, WebSearchTestCollections, WebSearchTestRecord, WebSearchTestLegacyURLs, WebSearchNearestTermsTest, WebSearchBooleanQueryTest, WebSearchAuthorQueryTest, WebSearchSearchEnginePythonAPITest, WebSearchSearchEngineWebAPITest, WebSearchRestrictedCollectionTest, WebSearchRestrictedPicturesTest, WebSearchRSSFeedServiceTest, WebSearchXSSVulnerabilityTest, WebSearchResultsOverview, WebSearchSortResultsTest, WebSearchSearchResultsXML, WebSearchUnicodeQueryTest, WebSearchMARCQueryTest, WebSearchExtSysnoQueryTest, WebSearchResultsRecordGroupingTest, WebSearchSpecialTermsQueryTest, WebSearchJournalQueryTest, WebSearchStemmedIndexQueryTest, WebSearchSummarizerTest, WebSearchRecordCollectionGuessTest, WebSearchGetFieldValuesTest, WebSearchAddToBasketTest, WebSearchAlertTeaserTest, WebSearchSpanQueryTest, WebSearchReferstoCitedbyTest, WebSearchSPIRESSyntaxTest, WebSearchDateQueryTest, WebSearchTestWildcardLimit, WebSearchSynonymQueryTest, WebSearchWashCollectionsTest) if __name__ == "__main__": run_test_suite(TEST_SUITE, warn_user=True) diff --git a/modules/websearch/lib/websearch_templates.py b/modules/websearch/lib/websearch_templates.py index b68351552..23caf3107 100644 --- a/modules/websearch/lib/websearch_templates.py +++ b/modules/websearch/lib/websearch_templates.py @@ -1,4432 +1,4413 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0301 __revision__ = "$Id$" import time import cgi import string import re import locale from urllib import quote, urlencode from xml.sax.saxutils import escape as xml_escape from invenio.config import \ CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH, \ CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, \ CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, \ CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD, \ CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \ CFG_WEBSEARCH_SPLIT_BY_COLLECTION, \ CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, \ CFG_BIBRANK_SHOW_READING_STATS, \ CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \ CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \ CFG_BIBRANK_SHOW_CITATION_LINKS, \ CFG_BIBRANK_SHOW_CITATION_STATS, \ CFG_BIBRANK_SHOW_CITATION_GRAPHS, \ CFG_WEBSEARCH_RSS_TTL, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_SITE_NAME_INTL, \ CFG_VERSION, \ CFG_SITE_URL, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_ADMIN_EMAIL, \ CFG_CERN_SITE, \ CFG_INSPIRE_SITE, \ CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \ CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES, \ CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \ CFG_BIBINDEX_CHARS_PUNCTUATION, \ CFG_WEBCOMMENT_ALLOW_COMMENTS, \ CFG_WEBCOMMENT_ALLOW_REVIEWS, \ CFG_WEBSEARCH_WILDCARD_LIMIT, \ CFG_WEBSEARCH_SHOW_COMMENT_COUNT, \ CFG_WEBSEARCH_SHOW_REVIEW_COUNT, \ CFG_SITE_RECORD from invenio.dbquery import run_sql from invenio.messages import gettext_set_language from invenio.urlutils import make_canonical_urlargd, drop_default_urlargd, create_html_link, create_url from invenio.htmlutils import nmtoken_from_string from invenio.webinterface_handler import wash_urlargd from invenio.bibrank_citation_searcher import get_cited_by_count from invenio.intbitset import intbitset from invenio.websearch_external_collections import external_collection_get_state, get_external_collection_engine from invenio.websearch_external_collections_utils import get_collection_id from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS +from invenio.search_engine_utils import get_fieldvalues _RE_PUNCTUATION = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION) _RE_SPACES = re.compile(r"\s+") -def get_fieldvalues(recID, tag): - """Return list of field values for field TAG inside record RECID. - FIXME: should be imported commonly for search_engine too.""" - out = [] - if tag == "001___": - # we have asked for recID that is not stored in bibXXx tables - out.append(str(recID)) - else: - # we are going to look inside bibXXx tables - digit = tag[0:2] - bx = "bib%sx" % digit - bibx = "bibrec_bib%sx" % digit - query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \ - "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag) - res = run_sql(query) - for row in res: - out.append(row[0]) - return out - - class Template: # This dictionary maps Invenio language code to locale codes (ISO 639) tmpl_localemap = { 'bg': 'bg_BG', 'ar': 'ar_AR', 'ca': 'ca_ES', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES', 'pt': 'pt_BR', 'fr': 'fr_FR', 'it': 'it_IT', 'ka': 'ka_GE', 'lt': 'lt_LT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'rw': 'rw_RW', 'sk': 'sk_SK', 'cs': 'cs_CZ', 'no': 'no_NO', 'sv': 'sv_SE', 'uk': 'uk_UA', 'ja': 'ja_JA', 'pl': 'pl_PL', 'hr': 'hr_HR', 'zh_CN': 'zh_CN', 'zh_TW': 'zh_TW', 'hu': 'hu_HU', 'af': 'af_ZA', 'gl': 'gl_ES' } tmpl_default_locale = "en_US" # which locale to use by default, useful in case of failure # Type of the allowed parameters for the web interface for search results search_results_default_urlargd = { 'cc': (str, CFG_SITE_NAME), 'c': (list, []), 'p': (str, ""), 'f': (str, ""), 'rg': (int, CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS), 'sf': (str, ""), 'so': (str, "d"), 'sp': (str, ""), 'rm': (str, ""), 'of': (str, "hb"), 'ot': (list, []), 'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'p1': (str, ""), 'f1': (str, ""), 'm1': (str, ""), 'op1':(str, ""), 'p2': (str, ""), 'f2': (str, ""), 'm2': (str, ""), 'op2':(str, ""), 'p3': (str, ""), 'f3': (str, ""), 'm3': (str, ""), 'sc': (int, 0), 'jrec': (int, 0), 'recid': (int, -1), 'recidb': (int, -1), 'sysno': (str, ""), 'id': (int, -1), 'idb': (int, -1), 'sysnb': (str, ""), 'action': (str, "search"), 'action_search': (str, ""), 'action_browse': (str, ""), 'd1': (str, ""), 'd1y': (int, 0), 'd1m': (int, 0), 'd1d': (int, 0), 'd2': (str, ""), 'd2y': (int, 0), 'd2m': (int, 0), 'd2d': (int, 0), 'dt': (str, ""), 'ap': (int, 1), 'verbose': (int, 0), 'ec': (list, []), 'wl': (int, CFG_WEBSEARCH_WILDCARD_LIMIT), } # ...and for search interfaces search_interface_default_urlargd = { 'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'verbose': (int, 0)} # ...and for RSS feeds rss_default_urlargd = {'c' : (list, []), 'cc' : (str, ""), 'p' : (str, ""), 'f' : (str, ""), 'p1' : (str, ""), 'f1' : (str, ""), 'm1' : (str, ""), 'op1': (str, ""), 'p2' : (str, ""), 'f2' : (str, ""), 'm2' : (str, ""), 'op2': (str, ""), 'p3' : (str, ""), 'f3' : (str, ""), 'm3' : (str, "")} tmpl_openurl_accepted_args = { 'id' : (list, []), 'genre' : (str, ''), 'aulast' : (str, ''), 'aufirst' : (str, ''), 'auinit' : (str, ''), 'auinit1' : (str, ''), 'auinitm' : (str, ''), 'issn' : (str, ''), 'eissn' : (str, ''), 'coden' : (str, ''), 'isbn' : (str, ''), 'sici' : (str, ''), 'bici' : (str, ''), 'title' : (str, ''), 'stitle' : (str, ''), 'atitle' : (str, ''), 'volume' : (str, ''), 'part' : (str, ''), 'issue' : (str, ''), 'spage' : (str, ''), 'epage' : (str, ''), 'pages' : (str, ''), 'artnum' : (str, ''), 'date' : (str, ''), 'ssn' : (str, ''), 'quarter' : (str, ''), 'url_ver' : (str, ''), 'ctx_ver' : (str, ''), 'rft_val_fmt' : (str, ''), 'rft_id' : (list, []), 'rft.atitle' : (str, ''), 'rft.title' : (str, ''), 'rft.jtitle' : (str, ''), 'rft.stitle' : (str, ''), 'rft.date' : (str, ''), 'rft.volume' : (str, ''), 'rft.issue' : (str, ''), 'rft.spage' : (str, ''), 'rft.epage' : (str, ''), 'rft.pages' : (str, ''), 'rft.artnumber' : (str, ''), 'rft.issn' : (str, ''), 'rft.eissn' : (str, ''), 'rft.aulast' : (str, ''), 'rft.aufirst' : (str, ''), 'rft.auinit' : (str, ''), 'rft.auinit1' : (str, ''), 'rft.auinitm' : (str, ''), 'rft.ausuffix' : (str, ''), 'rft.au' : (list, []), 'rft.aucorp' : (str, ''), 'rft.isbn' : (str, ''), 'rft.coden' : (str, ''), 'rft.sici' : (str, ''), 'rft.genre' : (str, 'unknown'), 'rft.chron' : (str, ''), 'rft.ssn' : (str, ''), 'rft.quarter' : (int, ''), 'rft.part' : (str, ''), 'rft.btitle' : (str, ''), 'rft.isbn' : (str, ''), 'rft.atitle' : (str, ''), 'rft.place' : (str, ''), 'rft.pub' : (str, ''), 'rft.edition' : (str, ''), 'rft.tpages' : (str, ''), 'rft.series' : (str, ''), } tmpl_opensearch_rss_url_syntax = "%(CFG_SITE_URL)s/rss?p={searchTerms}&jrec={startIndex}&rg={count}&ln={language}" % {'CFG_SITE_URL': CFG_SITE_URL} tmpl_opensearch_html_url_syntax = "%(CFG_SITE_URL)s/search?p={searchTerms}&jrec={startIndex}&rg={count}&ln={language}" % {'CFG_SITE_URL': CFG_SITE_URL} def tmpl_openurl2invenio(self, openurl_data): """ Return an Invenio url corresponding to a search with the data included in the openurl form map. """ def isbn_to_isbn13_isbn10(isbn): isbn = isbn.replace(' ', '').replace('-', '') if len(isbn) == 10 and isbn.isdigit(): ## We already have isbn10 return ('', isbn) if len(isbn) != 13 and isbn.isdigit(): return ('', '') isbn13, isbn10 = isbn, isbn[3:-1] checksum = 0 weight = 10 for char in isbn10: checksum += int(char) * weight weight -= 1 checksum = 11 - (checksum % 11) if checksum == 10: isbn10 += 'X' if checksum == 11: isbn10 += '0' else: isbn10 += str(checksum) return (isbn13, isbn10) from invenio.search_engine import perform_request_search doi = '' pmid = '' bibcode = '' oai = '' issn = '' isbn = '' for elem in openurl_data['id']: if elem.startswith('doi:'): doi = elem[len('doi:'):] elif elem.startswith('pmid:'): pmid = elem[len('pmid:'):] elif elem.startswith('bibcode:'): bibcode = elem[len('bibcode:'):] elif elem.startswith('oai:'): oai = elem[len('oai:'):] for elem in openurl_data['rft_id']: if elem.startswith('info:doi/'): doi = elem[len('info:doi/'):] elif elem.startswith('info:pmid/'): pmid = elem[len('info:pmid/'):] elif elem.startswith('info:bibcode/'): bibcode = elem[len('info:bibcode/'):] elif elem.startswith('info:oai/'): oai = elem[len('info:oai/')] elif elem.startswith('urn:ISBN:'): isbn = elem[len('urn:ISBN:'):] elif elem.startswith('urn:ISSN:'): issn = elem[len('urn:ISSN:'):] ## Building author query aulast = openurl_data['rft.aulast'] or openurl_data['aulast'] aufirst = openurl_data['rft.aufirst'] or openurl_data['aufirst'] auinit = openurl_data['rft.auinit'] or \ openurl_data['auinit'] or \ openurl_data['rft.auinit1'] + ' ' + openurl_data['rft.auinitm'] or \ openurl_data['auinit1'] + ' ' + openurl_data['auinitm'] or aufirst[:1] auinit = auinit.upper() if aulast and aufirst: author_query = 'author:"%s, %s" or author:"%s, %s"' % (aulast, aufirst, aulast, auinit) elif aulast and auinit: author_query = 'author:"%s, %s"' % (aulast, auinit) else: author_query = '' ## Building title query title = openurl_data['rft.atitle'] or \ openurl_data['atitle'] or \ openurl_data['rft.btitle'] or \ openurl_data['rft.title'] or \ openurl_data['title'] if title: title_query = 'title:"%s"' % title title_query_cleaned = 'title:"%s"' % _RE_SPACES.sub(' ', _RE_PUNCTUATION.sub(' ', title)) else: title_query = '' ## Building journal query jtitle = openurl_data['rft.stitle'] or \ openurl_data['stitle'] or \ openurl_data['rft.jtitle'] or \ openurl_data['title'] if jtitle: journal_query = 'journal:"%s"' % jtitle else: journal_query = '' ## Building isbn query isbn = isbn or openurl_data['rft.isbn'] or \ openurl_data['isbn'] isbn13, isbn10 = isbn_to_isbn13_isbn10(isbn) if isbn13: isbn_query = 'isbn:"%s" or isbn:"%s"' % (isbn13, isbn10) elif isbn10: isbn_query = 'isbn:"%s"' % isbn10 else: isbn_query = '' ## Building issn query issn = issn or openurl_data['rft.eissn'] or \ openurl_data['eissn'] or \ openurl_data['rft.issn'] or \ openurl_data['issn'] if issn: issn_query = 'issn:"%s"' % issn else: issn_query = '' ## Building coden query coden = openurl_data['rft.coden'] or openurl_data['coden'] if coden: coden_query = 'coden:"%s"' % coden else: coden_query = '' ## Building doi query if False: #doi: #FIXME Temporaly disabled until doi field is properly setup doi_query = 'doi:"%s"' % doi else: doi_query = '' ## Trying possible searches if doi_query: if perform_request_search(p=doi_query): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : doi_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if isbn_query: if perform_request_search(p=isbn_query): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : isbn_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if coden_query: if perform_request_search(p=coden_query): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : coden_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if author_query and title_query: if perform_request_search(p='%s and %s' % (title_query, author_query)): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : '%s and %s' % (title_query, author_query), 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if title_query: result = len(perform_request_search(p=title_query)) if result == 1: return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : title_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) elif result > 1: return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : title_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'})) ## Nothing worked, let's return a search that the user can improve if author_query and title_query: return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({ 'p' : '%s and %s' % (title_query_cleaned, author_query), 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'}, {})) elif title_query: return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({ 'p' : title_query_cleaned, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'}, {})) else: ## Mmh. Too few information provided. return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({ 'p' : 'recid:-1', 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'}, {})) def tmpl_opensearch_description(self, ln): """ Returns the OpenSearch description file of this site. """ _ = gettext_set_language(ln) return """ %(short_name)s %(long_name)s %(description)s UTF-8 UTF-8 * %(CFG_SITE_ADMIN_EMAIL)s Powered by Invenio %(CFG_SITE_URL)s """ % \ {'CFG_SITE_URL': CFG_SITE_URL, 'short_name': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)[:16], 'long_name': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME), 'description': (_("Search on %(x_CFG_SITE_NAME_INTL)s") % \ {'x_CFG_SITE_NAME_INTL': CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)})[:1024], 'CFG_SITE_ADMIN_EMAIL': CFG_SITE_ADMIN_EMAIL, 'rss_search_syntax': self.tmpl_opensearch_rss_url_syntax, 'html_search_syntax': self.tmpl_opensearch_html_url_syntax } def build_search_url(self, known_parameters={}, **kargs): """ Helper for generating a canonical search url. 'known_parameters' is the list of query parameters you inherit from your current query. You can then pass keyword arguments to modify this query. build_search_url(known_parameters, of="xm") The generated URL is absolute. """ parameters = {} parameters.update(known_parameters) parameters.update(kargs) # Now, we only have the arguments which have _not_ their default value parameters = drop_default_urlargd(parameters, self.search_results_default_urlargd) # Treat `as' argument specially: if parameters.has_key('aas'): parameters['as'] = parameters['aas'] del parameters['aas'] # Asking for a recid? Return a /CFG_SITE_RECORD/ URL if 'recid' in parameters: target = "%s/%s/%s" % (CFG_SITE_URL, CFG_SITE_RECORD, parameters['recid']) del parameters['recid'] target += make_canonical_urlargd(parameters, self.search_results_default_urlargd) return target return "%s/search%s" % (CFG_SITE_URL, make_canonical_urlargd(parameters, self.search_results_default_urlargd)) def build_search_interface_url(self, known_parameters={}, **kargs): """ Helper for generating a canonical search interface URL.""" parameters = {} parameters.update(known_parameters) parameters.update(kargs) c = parameters['c'] del parameters['c'] # Now, we only have the arguments which have _not_ their default value parameters = drop_default_urlargd(parameters, self.search_results_default_urlargd) # Treat `as' argument specially: if parameters.has_key('aas'): parameters['as'] = parameters['aas'] del parameters['aas'] if c and c != CFG_SITE_NAME: base = CFG_SITE_URL + '/collection/' + quote(c) else: base = CFG_SITE_URL return create_url(base, parameters) def build_rss_url(self, known_parameters, **kargs): """Helper for generating a canonical RSS URL""" parameters = {} parameters.update(known_parameters) parameters.update(kargs) # Keep only interesting parameters argd = wash_urlargd(parameters, self.rss_default_urlargd) if argd: # Handle 'c' differently since it is a list c = argd.get('c', []) del argd['c'] # Create query, and drop empty params args = make_canonical_urlargd(argd, self.rss_default_urlargd) if c != []: # Add collections c = [quote(coll) for coll in c] if args == '': args += '?' else: args += '&' args += 'c=' + '&c='.join(c) return CFG_SITE_URL + '/rss' + args def tmpl_record_page_header_content(self, req, recid, ln): """ Provide extra information in the header of /CFG_SITE_RECORD pages """ _ = gettext_set_language(ln) title = get_fieldvalues(recid, "245__a") if title: title = cgi.escape(title[0]) else: title = _("Record") + ' #%d' % recid keywords = ', '.join(get_fieldvalues(recid, "6531_a")) description = ' '.join(get_fieldvalues(recid, "520__a")) description += "\n" description += '; '.join(get_fieldvalues(recid, "100__a") + get_fieldvalues(recid, "700__a")) return [cgi.escape(x, True) for x in (title, description, keywords)] def tmpl_navtrail_links(self, aas, ln, dads): """ Creates the navigation bar at top of each search page (*Home > Root collection > subcollection > ...*) Parameters: - 'aas' *int* - Should we display an advanced search box? - 'ln' *string* - The language to display - 'separator' *string* - The separator between two consecutive collections - 'dads' *list* - A list of parent links, eachone being a dictionary of ('name', 'longname') """ out = [] for url, name in dads: args = {'c': url, 'as': aas, 'ln': ln} out.append(create_html_link(self.build_search_interface_url(**args), {}, cgi.escape(name), {'class': 'navtrail'})) return ' > '.join(out) def tmpl_webcoll_body(self, ln, collection, te_portalbox, searchfor, np_portalbox, narrowsearch, focuson, instantbrowse, ne_portalbox): """ Creates the body of the main search page. Parameters: - 'ln' *string* - language of the page being generated - 'collection' - collection id of the page being generated - 'te_portalbox' *string* - The HTML code for the portalbox on top of search - 'searchfor' *string* - The HTML code for the search for box - 'np_portalbox' *string* - The HTML code for the portalbox on bottom of search - 'narrowsearch' *string* - The HTML code for the search categories (left bottom of page) - 'focuson' *string* - The HTML code for the "focuson" categories (right bottom of page) - 'ne_portalbox' *string* - The HTML code for the bottom of the page """ if not narrowsearch: narrowsearch = instantbrowse body = '''
      %(searchfor)s %(np_portalbox)s ''' % { 'siteurl' : CFG_SITE_URL, 'searchfor' : searchfor, 'np_portalbox' : np_portalbox, 'narrowsearch' : narrowsearch, } if focuson: body += """""" body += """
      %(narrowsearch)s""" + focuson + """
      %(ne_portalbox)s
      """ % {'ne_portalbox' : ne_portalbox} return body def tmpl_portalbox(self, title, body): """Creates portalboxes based on the parameters Parameters: - 'title' *string* - The title of the box - 'body' *string* - The HTML code for the body of the box """ out = """
      %(title)s
      %(body)s
      """ % {'title' : cgi.escape(title), 'body' : body} return out def tmpl_searchfor_light(self, ln, collection_id, collection_name, record_count, example_search_queries): # EXPERIMENTAL """Produces light *Search for* box for the current collection. Parameters: - 'ln' *string* - *str* The language to display - 'collection_id' - *str* The collection id - 'collection_name' - *str* The collection name in current language - 'example_search_queries' - *list* List of search queries given as example for this collection """ # load the right message language _ = gettext_set_language(ln) out = ''' ''' argd = drop_default_urlargd({'ln': ln, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION}, self.search_results_default_urlargd) # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) header = _("Search %s records for:") % \ self.tmpl_nbrecs_info(record_count, "", "") asearchurl = self.build_search_interface_url(c=collection_id, aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln) # Build example of queries for this collection example_search_queries_links = [create_html_link(self.build_search_url(p=example_query, ln=ln, aas= -1, c=collection_id), {}, cgi.escape(example_query), {'class': 'examplequery'}) \ for example_query in example_search_queries] example_query_html = '' if len(example_search_queries) > 0: example_query_link = example_search_queries_links[0] # offers more examples if possible more = '' if len(example_search_queries_links) > 1: more = ''' ''' % {'more_example_queries': '
      '.join(example_search_queries_links[1:]), 'show_less':_("less"), 'show_more':_("more")} example_query_html += '''

      %(example)s%(more)s

      ''' % {'example': _("Example: %(x_sample_search_query)s") % \ {'x_sample_search_query': example_query_link}, 'more': more} # display options to search in current collection or everywhere search_in = '' if collection_name != CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME): search_in += ''' ''' % {'search_in_collection_name': _("Search in %(x_collection_name)s") % \ {'x_collection_name': collection_name}, 'collection_id': collection_id, 'root_collection_name': CFG_SITE_NAME, 'search_everywhere': _("Search everywhere")} # print commentary start: out += ''' %(search_in)s ''' % {'ln' : ln, 'sizepattern' : CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'siteurl' : CFG_SITE_URL, 'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')), 'header' : header, 'msg_search' : _('Search'), 'msg_browse' : _('Browse'), 'msg_search_tips' : _('Search Tips'), 'search_in': search_in, 'example_query_html': example_query_html} return out def tmpl_searchfor_simple(self, ln, collection_id, collection_name, record_count, middle_option): """Produces simple *Search for* box for the current collection. Parameters: - 'ln' *string* - *str* The language to display - 'collection_id' - *str* The collection id - 'collection_name' - *str* The collection name in current language - 'record_count' - *str* Number of records in this collection - 'middle_option' *string* - HTML code for the options (any field, specific fields ...) """ # load the right message language _ = gettext_set_language(ln) out = ''' ''' argd = drop_default_urlargd({'ln': ln, 'cc': collection_id, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION}, self.search_results_default_urlargd) # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) header = _("Search %s records for:") % \ self.tmpl_nbrecs_info(record_count, "", "") asearchurl = self.build_search_interface_url(c=collection_id, aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln) # print commentary start: out += ''' ''' % {'ln' : ln, 'sizepattern' : CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'siteurl' : CFG_SITE_URL, 'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')), 'header' : header, 'middle_option' : middle_option, 'msg_search' : _('Search'), 'msg_browse' : _('Browse'), 'msg_search_tips' : _('Search Tips')} return out def tmpl_searchfor_advanced(self, ln, # current language collection_id, collection_name, record_count, middle_option_1, middle_option_2, middle_option_3, searchoptions, sortoptions, rankoptions, displayoptions, formatoptions ): """ Produces advanced *Search for* box for the current collection. Parameters: - 'ln' *string* - The language to display - 'middle_option_1' *string* - HTML code for the first row of options (any field, specific fields ...) - 'middle_option_2' *string* - HTML code for the second row of options (any field, specific fields ...) - 'middle_option_3' *string* - HTML code for the third row of options (any field, specific fields ...) - 'searchoptions' *string* - HTML code for the search options - 'sortoptions' *string* - HTML code for the sort options - 'rankoptions' *string* - HTML code for the rank options - 'displayoptions' *string* - HTML code for the display options - 'formatoptions' *string* - HTML code for the format options """ # load the right message language _ = gettext_set_language(ln) out = ''' ''' argd = drop_default_urlargd({'ln': ln, 'aas': 1, 'cc': collection_id, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION}, self.search_results_default_urlargd) # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) header = _("Search %s records for") % \ self.tmpl_nbrecs_info(record_count, "", "") header += ':' ssearchurl = self.build_search_interface_url(c=collection_id, aas=min(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln) out += ''' ''' % {'ln' : ln, 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'siteurl' : CFG_SITE_URL, 'ssearch' : create_html_link(ssearchurl, {}, _("Simple Search")), 'header' : header, 'matchbox_m1' : self.tmpl_matchtype_box('m1', ln=ln), 'middle_option_1' : middle_option_1, 'andornot_op1' : self.tmpl_andornot_box('op1', ln=ln), 'matchbox_m2' : self.tmpl_matchtype_box('m2', ln=ln), 'middle_option_2' : middle_option_2, 'andornot_op2' : self.tmpl_andornot_box('op2', ln=ln), 'matchbox_m3' : self.tmpl_matchtype_box('m3', ln=ln), 'middle_option_3' : middle_option_3, 'msg_search' : _("Search"), 'msg_browse' : _("Browse"), 'msg_search_tips' : _("Search Tips")} if (searchoptions): out += """""" % { 'searchheader' : _("Search options:"), 'searchoptions' : searchoptions } out += """ """ % { 'added' : _("Added/modified since:"), 'until' : _("until:"), 'added_or_modified': self.tmpl_inputdatetype(ln=ln), 'date_added' : self.tmpl_inputdate("d1", ln=ln), 'date_until' : self.tmpl_inputdate("d2", ln=ln), 'msg_sort' : _("Sort by:"), 'msg_display' : _("Display results:"), 'msg_format' : _("Output format:"), 'sortoptions' : sortoptions, 'rankoptions' : rankoptions, 'displayoptions' : displayoptions, 'formatoptions' : formatoptions } return out def tmpl_matchtype_box(self, name='m', value='', ln='en'): """Returns HTML code for the 'match type' selection box. Parameters: - 'name' *string* - The name of the produced select - 'value' *string* - The selected value (if any value is already selected) - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) out = """ """ % {'name' : name, 'sela' : self.tmpl_is_selected('a', value), 'opta' : _("All of the words:"), 'selo' : self.tmpl_is_selected('o', value), 'opto' : _("Any of the words:"), 'sele' : self.tmpl_is_selected('e', value), 'opte' : _("Exact phrase:"), 'selp' : self.tmpl_is_selected('p', value), 'optp' : _("Partial phrase:"), 'selr' : self.tmpl_is_selected('r', value), 'optr' : _("Regular expression:") } return out def tmpl_is_selected(self, var, fld): """ Checks if *var* and *fld* are equal, and if yes, returns ' selected="selected"'. Useful for select boxes. Parameters: - 'var' *string* - First value to compare - 'fld' *string* - Second value to compare """ if var == fld: return ' selected="selected"' else: return "" def tmpl_andornot_box(self, name='op', value='', ln='en'): """ Returns HTML code for the AND/OR/NOT selection box. Parameters: - 'name' *string* - The name of the produced select - 'value' *string* - The selected value (if any value is already selected) - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) out = """ """ % {'name' : name, 'sela' : self.tmpl_is_selected('a', value), 'opta' : _("AND"), 'selo' : self.tmpl_is_selected('o', value), 'opto' : _("OR"), 'seln' : self.tmpl_is_selected('n', value), 'optn' : _("AND NOT") } return out def tmpl_inputdate(self, name, ln, sy=0, sm=0, sd=0): """ Produces *From Date*, *Until Date* kind of selection box. Suitable for search options. Parameters: - 'name' *string* - The base name of the produced selects - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) box = """ """ # month box += """ """ # year box += """ """ return box def tmpl_inputdatetype(self, dt='', ln=CFG_SITE_LANG): """ Produces input date type selection box to choose added-or-modified date search option. Parameters: - 'dt' *string - date type (c=created, m=modified) - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) box = """ """ % { 'added': _("Added since:"), 'modified': _("Modified since:"), 'sel': self.tmpl_is_selected(dt, 'm'), } return box def tmpl_narrowsearch(self, aas, ln, type, father, has_grandchildren, sons, display_grandsons, grandsons): """ Creates list of collection descendants of type *type* under title *title*. If aas==1, then links to Advanced Search interfaces; otherwise Simple Search. Suitable for 'Narrow search' and 'Focus on' boxes. Parameters: - 'aas' *bool* - Should we display an advanced search box? - 'ln' *string* - The language to display - 'type' *string* - The type of the produced box (virtual collections or normal collections) - 'father' *collection* - The current collection - 'has_grandchildren' *bool* - If the current collection has grand children - 'sons' *list* - The list of the sub-collections (first level) - 'display_grandsons' *bool* - If the grand children collections should be displayed (2 level deep display) - 'grandsons' *list* - The list of sub-collections (second level) """ # load the right message language _ = gettext_set_language(ln) title = {'r': _("Narrow by collection:"), 'v': _("Focus on:")}[type] if has_grandchildren: style_prolog = "" style_epilog = "" else: style_prolog = "" style_epilog = "" out = """""" % {'title' : title, 'narrowsearchbox': {'r': 'narrowsearchbox', 'v': 'focusonsearchbox'}[type]} # iterate through sons: i = 0 for son in sons: out += """""" % {'name' : cgi.escape(son.name) } # hosted collections are checked by default only when configured so elif str(son.dbquery).startswith("hostedcollection:"): external_collection_engine = get_external_collection_engine(str(son.name)) if external_collection_engine and external_collection_engine.selected_by_default: out += """""" % {'name' : cgi.escape(son.name) } elif external_collection_engine and not external_collection_engine.selected_by_default: out += """""" % {'name' : cgi.escape(son.name) } else: # strangely, the external collection engine was never found. In that case, # why was the hosted collection here in the first place? out += """""" % {'name' : cgi.escape(son.name) } else: out += """""" % {'name' : cgi.escape(son.name) } else: out += '' out += """""" i += 1 out += "
      %(title)s
      """ % \ { 'narrowsearchbox': {'r': 'narrowsearchbox', 'v': 'focusonsearchbox'}[type]} if type == 'r': if son.restricted_p() and son.restricted_p() != father.restricted_p(): out += """%(link)s%(recs)s """ % { 'link': create_html_link(self.build_search_interface_url(c=son.name, ln=ln, aas=aas), {}, style_prolog + cgi.escape(son.get_name(ln)) + style_epilog), 'recs' : self.tmpl_nbrecs_info(son.nbrecs, ln=ln)} # the following prints the "external collection" arrow just after the name and # number of records of the hosted collection # 1) we might want to make the arrow work as an anchor to the hosted collection as well. # That would probably require a new separate function under invenio.urlutils # 2) we might want to place the arrow between the name and the number of records of the hosted collection # That would require to edit/separate the above out += ... if type == 'r': if str(son.dbquery).startswith("hostedcollection:"): out += """%(name)s""" % \ { 'siteurl' : CFG_SITE_URL, 'name' : cgi.escape(son.name), } if son.restricted_p(): out += """ [%(msg)s] """ % { 'msg' : _("restricted") } if display_grandsons and len(grandsons[i]): # iterate trough grandsons: out += """
      """ for grandson in grandsons[i]: out += """ %(link)s%(nbrec)s """ % { 'link': create_html_link(self.build_search_interface_url(c=grandson.name, ln=ln, aas=aas), {}, cgi.escape(grandson.get_name(ln))), 'nbrec' : self.tmpl_nbrecs_info(grandson.nbrecs, ln=ln)} # the following prints the "external collection" arrow just after the name and # number of records of the hosted collection # Some relatives comments have been made just above if type == 'r': if str(grandson.dbquery).startswith("hostedcollection:"): out += """%(name)s""" % \ { 'siteurl' : CFG_SITE_URL, 'name' : cgi.escape(grandson.name), } out += """
      " return out def tmpl_searchalso(self, ln, engines_list, collection_id): _ = gettext_set_language(ln) box_name = _("Search also:") html = """
      """ % locals() for engine in engines_list: internal_name = engine.name name = _(internal_name) base_url = engine.base_url if external_collection_get_state(engine, collection_id) == 3: checked = ' checked="checked"' else: checked = '' html += """""" % \ { 'checked': checked, 'base_url': base_url, 'internal_name': internal_name, 'name': cgi.escape(name), 'id': "extSearch" + nmtoken_from_string(name), 'siteurl': CFG_SITE_URL, } html += """
      %(box_name)s
      %(name)s
      """ return html def tmpl_nbrecs_info(self, number, prolog=None, epilog=None, ln=CFG_SITE_LANG): """ Return information on the number of records. Parameters: - 'number' *string* - The number of records - 'prolog' *string* (optional) - An HTML code to prefix the number (if **None**, will be '(') - 'epilog' *string* (optional) - An HTML code to append to the number (if **None**, will be ')') """ if number is None: number = 0 if prolog is None: prolog = ''' (''' if epilog is None: epilog = ''')''' return prolog + self.tmpl_nice_number(number, ln) + epilog def tmpl_box_restricted_content(self, ln): """ Displays a box containing a *restricted content* message Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) return _("This collection is restricted. If you are authorized to access it, please click on the Search button.") def tmpl_box_hosted_collection(self, ln): """ Displays a box containing a *hosted collection* message Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) return _("This is a hosted external collection. Please click on the Search button to see its content.") def tmpl_box_no_records(self, ln): """ Displays a box containing a *no content* message Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) return _("This collection does not contain any document yet.") def tmpl_instant_browse(self, aas, ln, recids, more_link=None): """ Formats a list of records (given in the recids list) from the database. Parameters: - 'aas' *int* - Advanced Search interface or not (0 or 1) - 'ln' *string* - The language to display - 'recids' *list* - the list of records from the database - 'more_link' *string* - the "More..." link for the record. If not given, will not be displayed """ # load the right message language _ = gettext_set_language(ln) body = '''''' for recid in recids: body += ''' ''' % { 'recid': recid['id'], 'date': recid['date'], 'body': recid['body'] } body += "
      %(date)s %(body)s
      " if more_link: body += '
      ' + \ create_html_link(more_link, {}, '[>> %s]' % _("more")) + \ '
      ' return '''
      %(header)s
      %(body)s
      ''' % {'header' : _("Latest additions:"), 'body' : body, } def tmpl_searchwithin_select(self, ln, fieldname, selected, values): """ Produces 'search within' selection box for the current collection. Parameters: - 'ln' *string* - The language to display - 'fieldname' *string* - the name of the select box produced - 'selected' *string* - which of the values is selected - 'values' *list* - the list of values in the select """ out = '""" return out def tmpl_select(self, fieldname, values, selected=None, css_class=''): """ Produces a generic select box Parameters: - 'css_class' *string* - optional, a css class to display this select with - 'fieldname' *list* - the name of the select box produced - 'selected' *string* - which of the values is selected - 'values' *list* - the list of values in the select """ if css_class != '': class_field = ' class="%s"' % css_class else: class_field = '' out = '""" return out def tmpl_record_links(self, recid, ln, sf='', so='d', sp='', rm=''): """ Displays the *More info* and *Find similar* links for a record Parameters: - 'ln' *string* - The language to display - 'recid' *string* - the id of the displayed record """ # load the right message language _ = gettext_set_language(ln) out = '''
      %(detailed)s - %(similar)s''' % { 'detailed': create_html_link(self.build_search_url(recid=recid, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}), 'similar': create_html_link(self.build_search_url(p="recid:%d" % recid, rm='wrd', ln=ln), {}, _("Similar records"), {'class': "moreinfo"})} if CFG_BIBRANK_SHOW_CITATION_LINKS: num_timescited = get_cited_by_count(recid) if num_timescited: out += ''' - %s ''' % \ create_html_link(self.build_search_url(p='refersto:recid:%d' % recid, sf=sf, so=so, sp=sp, rm=rm, ln=ln), {}, _("Cited by %i records") % num_timescited, {'class': "moreinfo"}) return out def tmpl_record_body(self, titles, authors, dates, rns, abstracts, urls_u, urls_z, ln): """ Displays the "HTML basic" format of a record Parameters: - 'authors' *list* - the authors (as strings) - 'dates' *list* - the dates of publication - 'rns' *list* - the quicknotes for the record - 'abstracts' *list* - the abstracts for the record - 'urls_u' *list* - URLs to the original versions of the record - 'urls_z' *list* - Not used """ out = "" for title in titles: out += "%(title)s " % { 'title' : cgi.escape(title) } if authors: out += " / " for author in authors[:CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD]: out += '%s ' % \ create_html_link(self.build_search_url(p=author, f='author', ln=ln), {}, cgi.escape(author)) if len(authors) > CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD: out += "et al" for date in dates: out += " %s." % cgi.escape(date) for rn in rns: out += """ [%(rn)s]""" % {'rn' : cgi.escape(rn)} for abstract in abstracts: out += "
      %(abstract)s [...]" % {'abstract' : cgi.escape(abstract[:1 + string.find(abstract, '.')]) } for idx in range(0, len(urls_u)): out += """
      %(name)s""" % { 'url' : urls_u[idx], 'name' : urls_u[idx] } return out def tmpl_search_in_bibwords(self, p, f, ln, nearest_box): """ Displays the *Words like current ones* links for a search Parameters: - 'p' *string* - Current search words - 'f' *string* - the fields in which the search was done - 'nearest_box' *string* - the HTML code for the "nearest_terms" box - most probably from a create_nearest_terms_box call """ # load the right message language _ = gettext_set_language(ln) out = '

      ' if f: out += _("Words nearest to %(x_word)s inside %(x_field)s in any collection are:") % {'x_word': '' + cgi.escape(p) + '', 'x_field': '' + cgi.escape(f) + ''} else: out += _("Words nearest to %(x_word)s in any collection are:") % {'x_word': '' + cgi.escape(p) + ''} out += '
      ' + nearest_box + '

      ' return out def tmpl_nearest_term_box(self, p, ln, f, terminfo, intro): """ Displays the *Nearest search terms* box Parameters: - 'p' *string* - Current search words - 'f' *string* - a collection description (if the search has been completed in a collection) - 'ln' *string* - The language to display - 'terminfo': tuple (term, hits, argd) for each near term - 'intro' *string* - the intro HTML to prefix the box with """ out = '''''' for term, hits, argd in terminfo: if hits: hitsinfo = str(hits) else: hitsinfo = '-' term = cgi.escape(term) if term == p: # print search word for orientation: nearesttermsboxbody_class = "nearesttermsboxbodyselected" if hits > 0: term = create_html_link(self.build_search_url(argd), {}, term, {'class': "nearesttermsselected"}) else: nearesttermsboxbody_class = "nearesttermsboxbody" term = create_html_link(self.build_search_url(argd), {}, term, {'class': "nearestterms"}) out += '''\ ''' % {'hits': hitsinfo, 'nearesttermsboxbody_class': nearesttermsboxbody_class, 'term': term} out += "
      %(hits)s   %(term)s
      " return intro + "
      " + out + "
      " def tmpl_browse_pattern(self, f, fn, ln, browsed_phrases_in_colls, colls, rg): """ Displays the *Nearest search terms* box Parameters: - 'f' *string* - field (*not* i18nized) - 'fn' *string* - field name (i18nized) - 'ln' *string* - The language to display - 'browsed_phrases_in_colls' *array* - the phrases to display - 'colls' *array* - the list of collection parameters of the search (c's) - 'rg' *int* - the number of records """ # load the right message language _ = gettext_set_language(ln) out = """""" % { 'hits' : _("Hits"), 'fn' : cgi.escape(fn) } if len(browsed_phrases_in_colls) == 1: # one hit only found: phrase, nbhits = browsed_phrases_in_colls[0][0], browsed_phrases_in_colls[0][1] query = {'c': colls, 'ln': ln, 'p': '"%s"' % phrase.replace('"', '\\"'), 'f': f, 'rg' : rg} out += """""" % {'nbhits': nbhits, 'link': create_html_link(self.build_search_url(query), {}, cgi.escape(phrase))} elif len(browsed_phrases_in_colls) > 1: # first display what was found but the last one: for phrase, nbhits in browsed_phrases_in_colls[:-1]: query = {'c': colls, 'ln': ln, 'p': '"%s"' % phrase.replace('"', '\\"'), 'f': f, 'rg' : rg} out += """""" % {'nbhits' : nbhits, 'link': create_html_link(self.build_search_url(query), {}, cgi.escape(phrase))} # now display last hit as "previous term": phrase, nbhits = browsed_phrases_in_colls[0] query_previous = {'c': colls, 'ln': ln, 'p': '"%s"' % phrase.replace('"', '\\"'), 'f': f, 'rg' : rg} # now display last hit as "next term": phrase, nbhits = browsed_phrases_in_colls[-1] query_next = {'c': colls, 'ln': ln, 'p': '"%s"' % phrase.replace('"', '\\"'), 'f': f, 'rg' : rg} out += """""" % {'link_previous': create_html_link(self.build_search_url(query_previous, action='browse'), {}, _("Previous")), 'link_next': create_html_link(self.build_search_url(query_next, action='browse'), {}, _("next")), 'siteurl' : CFG_SITE_URL} out += """
      %(hits)s   %(fn)s
      %(nbhits)s   %(link)s
      %(nbhits)s   %(link)s
        %(link_previous)s %(link_next)s
      """ return out def tmpl_search_box(self, ln, aas, cc, cc_intl, ot, sp, action, fieldslist, f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2, rm, p, f, coll_selects, d1y, d2y, d1m, d2m, d1d, d2d, dt, sort_fields, sf, so, ranks, sc, rg, formats, of, pl, jrec, ec, show_colls=True, show_title=True): """ Displays the *Nearest search terms* box Parameters: - 'ln' *string* - The language to display - 'aas' *bool* - Should we display an advanced search box? -1 -> 1, from simpler to more advanced - 'cc_intl' *string* - the i18nized current collection name, used for display - 'cc' *string* - the internal current collection name - 'ot', 'sp' *string* - hidden values - 'action' *string* - the action demanded by the user - 'fieldslist' *list* - the list of all fields available, for use in select within boxes in advanced search - 'p, f, f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2, op3, rm' *strings* - the search parameters - 'coll_selects' *array* - a list of lists, each containing the collections selects to display - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates - 'dt' *string* - the dates' types (creation dates, modification dates) - 'sort_fields' *array* - the select information for the sort fields - 'sf' *string* - the currently selected sort field - 'so' *string* - the currently selected sort order ("a" or "d") - 'ranks' *array* - ranking methods - 'rm' *string* - selected ranking method - 'sc' *string* - split by collection or not - 'rg' *string* - selected results/page - 'formats' *array* - available output formats - 'of' *string* - the selected output format - 'pl' *string* - `limit to' search pattern - show_colls *bool* - propose coll selection box? - show_title *bool* show cc_intl in page title? """ # load the right message language _ = gettext_set_language(ln) # These are hidden fields the user does not manipulate # directly if aas == -1: argd = drop_default_urlargd({ 'ln': ln, 'aas': aas, 'ot': ot, 'sp': sp, 'ec': ec, }, self.search_results_default_urlargd) else: argd = drop_default_urlargd({ 'cc': cc, 'ln': ln, 'aas': aas, 'ot': ot, 'sp': sp, 'ec': ec, }, self.search_results_default_urlargd) out = "" if show_title: # display cc name if asked for out += '''

      %(ccname)s

      ''' % {'ccname' : cgi.escape(cc_intl), } out += '''
      ''' % {'siteurl' : CFG_SITE_URL} # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) leadingtext = _("Search") if action == 'browse': leadingtext = _("Browse") if aas == 1: # print Advanced Search form: # define search box elements: out += ''' ''' % { 'simple_search': create_html_link(self.build_search_url(p=p1, f=f1, rm=rm, cc=cc, ln=ln, jrec=jrec, rg=rg), {}, _("Simple Search")), 'leading' : leadingtext, 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, 'matchbox1' : self.tmpl_matchtype_box('m1', m1, ln=ln), 'p1' : cgi.escape(p1, 1), 'searchwithin1' : self.tmpl_searchwithin_select( ln=ln, fieldname='f1', selected=f1, values=self._add_mark_to_field(value=f1, fields=fieldslist, ln=ln) ), 'andornot1' : self.tmpl_andornot_box( name='op1', value=op1, ln=ln ), 'matchbox2' : self.tmpl_matchtype_box('m2', m2, ln=ln), 'p2' : cgi.escape(p2, 1), 'searchwithin2' : self.tmpl_searchwithin_select( ln=ln, fieldname='f2', selected=f2, values=self._add_mark_to_field(value=f2, fields=fieldslist, ln=ln) ), 'andornot2' : self.tmpl_andornot_box( name='op2', value=op2, ln=ln ), 'matchbox3' : self.tmpl_matchtype_box('m3', m3, ln=ln), 'p3' : cgi.escape(p3, 1), 'searchwithin3' : self.tmpl_searchwithin_select( ln=ln, fieldname='f3', selected=f3, values=self._add_mark_to_field(value=f3, fields=fieldslist, ln=ln) ), 'search' : _("Search"), 'browse' : _("Browse"), 'siteurl' : CFG_SITE_URL, 'ln' : ln, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'search_tips': _("Search Tips") } elif aas == 0: # print Simple Search form: out += ''' ''' % { 'advanced_search': create_html_link(self.build_search_url(p1=p, f1=f, rm=rm, aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), cc=cc, jrec=jrec, ln=ln, rg=rg), {}, _("Advanced Search")), 'leading' : leadingtext, 'sizepattern' : CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, 'p' : cgi.escape(p, 1), 'searchwithin' : self.tmpl_searchwithin_select( ln=ln, fieldname='f', selected=f, values=self._add_mark_to_field(value=f, fields=fieldslist, ln=ln) ), 'search' : _("Search"), 'browse' : _("Browse"), 'siteurl' : CFG_SITE_URL, 'ln' : ln, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'search_tips': _("Search Tips") } else: # EXPERIMENTAL # print light search form: search_in = '' if cc_intl != CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME): search_in = ''' ''' % {'search_in_collection_name': _("Search in %(x_collection_name)s") % \ {'x_collection_name': cgi.escape(cc_intl)}, 'collection_id': cc, 'root_collection_name': CFG_SITE_NAME, 'search_everywhere': _("Search everywhere")} out += ''' %(search_in)s ''' % { 'sizepattern' : CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH, 'advanced_search': create_html_link(self.build_search_url(p1=p, f1=f, rm=rm, aas=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), cc=cc, jrec=jrec, ln=ln, rg=rg), {}, _("Advanced Search")), 'leading' : leadingtext, 'p' : cgi.escape(p, 1), 'searchwithin' : self.tmpl_searchwithin_select( ln=ln, fieldname='f', selected=f, values=self._add_mark_to_field(value=f, fields=fieldslist, ln=ln) ), 'search' : _("Search"), 'browse' : _("Browse"), 'siteurl' : CFG_SITE_URL, 'ln' : ln, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'search_tips': _("Search Tips"), 'search_in': search_in } ## secondly, print Collection(s) box: if show_colls and aas > -1: # display collections only if there is more than one selects = '' for sel in coll_selects: selects += self.tmpl_select(fieldname='c', values=sel) out += """ """ % { 'leading' : leadingtext, 'msg_coll' : _("collections"), 'colls' : selects, } ## thirdly, print search limits, if applicable: if action != _("Browse") and pl: out += """""" % { 'limitto' : _("Limit to:"), 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, 'pl' : cgi.escape(pl, 1), } ## fourthly, print from/until date boxen, if applicable: if action == _("Browse") or (d1y == 0 and d1m == 0 and d1d == 0 and d2y == 0 and d2m == 0 and d2d == 0): pass # do not need it else: cell_6_a = self.tmpl_inputdatetype(dt, ln) + self.tmpl_inputdate("d1", ln, d1y, d1m, d1d) cell_6_b = self.tmpl_inputdate("d2", ln, d2y, d2m, d2d) out += """""" % { 'added' : _("Added/modified since:"), 'until' : _("until:"), 'added_or_modified': self.tmpl_inputdatetype(dt, ln), 'date1' : self.tmpl_inputdate("d1", ln, d1y, d1m, d1d), 'date2' : self.tmpl_inputdate("d2", ln, d2y, d2m, d2d), } ## fifthly, print Display results box, including sort/rank, formats, etc: if action != _("Browse") and aas > -1: rgs = [] for i in [10, 25, 50, 100, 250, 500]: if i <= CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS: rgs.append({ 'value' : i, 'text' : "%d %s" % (i, _("results"))}) # enrich sort fields list if we are sorting by some MARC tag: sort_fields = self._add_mark_to_field(value=sf, fields=sort_fields, ln=ln) # create sort by HTML box: out += """""" % { 'sort_by' : _("Sort by:"), 'display_res' : _("Display results:"), 'out_format' : _("Output format:"), 'select_sf' : self.tmpl_select(fieldname='sf', values=sort_fields, selected=sf, css_class='address'), 'select_so' : self.tmpl_select(fieldname='so', values=[{ 'value' : 'a', 'text' : _("asc.") }, { 'value' : 'd', 'text' : _("desc.") }], selected=so, css_class='address'), 'select_rm' : self.tmpl_select(fieldname='rm', values=ranks, selected=rm, css_class='address'), 'select_rg' : self.tmpl_select(fieldname='rg', values=rgs, selected=rg, css_class='address'), 'select_sc' : self.tmpl_select(fieldname='sc', values=[{ 'value' : 0, 'text' : _("single list") }, { 'value' : 1, 'text' : _("split by collection") }], selected=sc, css_class='address'), 'select_of' : self.tmpl_select( fieldname='of', selected=of, values=self._add_mark_to_field(value=of, fields=formats, chars=3, ln=ln), css_class='address'), } ## last but not least, print end of search box: out += """
      """ return out def tmpl_input_hidden(self, name, value): "Produces the HTML code for a hidden field " if isinstance(value, list): list_input = [self.tmpl_input_hidden(name, val) for val in value] return "\n".join(list_input) # # Treat `as', `aas' arguments specially: if name == 'aas': name = 'as' return """""" % { 'name' : cgi.escape(str(name), 1), 'value' : cgi.escape(str(value), 1), } def _add_mark_to_field(self, value, fields, ln, chars=1): """Adds the current value as a MARC tag in the fields array Useful for advanced search""" # load the right message language _ = gettext_set_language(ln) out = fields if value and str(value[0:chars]).isdigit(): out.append({'value' : value, 'text' : str(value) + " " + _("MARC tag") }) return out def tmpl_search_pagestart(self, ln) : "page start for search page. Will display after the page header" return """
      """ def tmpl_search_pageend(self, ln) : "page end for search page. Will display just before the page footer" return """
      """ def tmpl_print_warning(self, msg, type, prologue, epilogue): """Prints warning message and flushes output. Parameters: - 'msg' *string* - The message string - 'type' *string* - the warning type - 'prologue' *string* - HTML code to display before the warning - 'epilogue' *string* - HTML code to display after the warning """ out = '\n%s' % (prologue) if type: out += '%s: ' % type out += '%s%s' % (msg, epilogue) return out def tmpl_print_search_info(self, ln, middle_only, collection, collection_name, collection_id, aas, sf, so, rm, rg, nb_found, of, ot, p, f, f1, f2, f3, m1, m2, m3, op1, op2, p1, p2, p3, d1y, d1m, d1d, d2y, d2m, d2d, dt, all_fieldcodes, cpu_time, pl_in_url, jrec, sc, sp): """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time. Also, prints navigation links (beg/next/prev/end) inside the results set. If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links. This is suitable for displaying navigation links at the bottom of the search results page. Parameters: - 'ln' *string* - The language to display - 'middle_only' *bool* - Only display parts of the interface - 'collection' *string* - the collection name - 'collection_name' *string* - the i18nized current collection name - 'aas' *bool* - if we display the advanced search interface - 'sf' *string* - the currently selected sort format - 'so' *string* - the currently selected sort order ("a" or "d") - 'rm' *string* - selected ranking method - 'rg' *int* - selected results/page - 'nb_found' *int* - number of results found - 'of' *string* - the selected output format - 'ot' *string* - hidden values - 'p' *string* - Current search words - 'f' *string* - the fields in which the search was done - 'f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2' *strings* - the search parameters - 'jrec' *int* - number of first record on this page - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates - 'dt' *string* the dates' type (creation date, modification date) - 'all_fieldcodes' *array* - all the available fields - 'cpu_time' *float* - the time of the query in seconds """ # load the right message language _ = gettext_set_language(ln) out = "" # left table cells: print collection name if not middle_only: out += '''
      ''' % { 'collection_id': collection_id, 'siteurl' : CFG_SITE_URL, 'collection_link': create_html_link(self.build_search_interface_url(c=collection, aas=aas, ln=ln), {}, cgi.escape(collection_name)) } else: out += """
      """ % { 'siteurl' : CFG_SITE_URL } # middle table cell: print beg/next/prev/end arrows: if not middle_only: out += """
      " else: out += "" # right table cell: cpu time info if not middle_only: if cpu_time > -1: out += """""" % { 'time' : _("Search took %s seconds.") % ('%.2f' % cpu_time), } out += "
      %(collection_link)s %(recs_found)s  """ % { 'recs_found' : _("%s records found") % ('' + self.tmpl_nice_number(nb_found, ln) + '') } else: out += "" if nb_found > rg: out += "" + cgi.escape(collection_name) + " : " + _("%s records found") % ('' + self.tmpl_nice_number(nb_found, ln) + '') + "   " if nb_found > rg: # navig.arrows are needed, since we have many hits query = {'p': p, 'f': f, 'cc': collection, 'sf': sf, 'so': so, 'sp': sp, 'rm': rm, 'of': of, 'ot': ot, 'aas': aas, 'ln': ln, 'p1': p1, 'p2': p2, 'p3': p3, 'f1': f1, 'f2': f2, 'f3': f3, 'm1': m1, 'm2': m2, 'm3': m3, 'op1': op1, 'op2': op2, 'sc': 0, 'd1y': d1y, 'd1m': d1m, 'd1d': d1d, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d, 'dt': dt, } # @todo here def img(gif, txt): return '%(txt)s' % { 'txt': txt, 'gif': gif, 'siteurl': CFG_SITE_URL} if jrec - rg > 1: out += create_html_link(self.build_search_url(query, jrec=1, rg=rg), {}, img('sb', _("begin")), {'class': 'img'}) if jrec > 1: out += create_html_link(self.build_search_url(query, jrec=max(jrec - rg, 1), rg=rg), {}, img('sp', _("previous")), {'class': 'img'}) if jrec + rg - 1 < nb_found: out += "%d - %d" % (jrec, jrec + rg - 1) else: out += "%d - %d" % (jrec, nb_found) if nb_found >= jrec + rg: out += create_html_link(self.build_search_url(query, jrec=jrec + rg, rg=rg), {}, img('sn', _("next")), {'class':'img'}) if nb_found >= jrec + rg + rg: out += create_html_link(self.build_search_url(query, jrec=nb_found - rg + 1, rg=rg), {}, img('se', _("end")), {'class': 'img'}) # still in the navigation part cc = collection sc = 0 for var in ['p', 'cc', 'f', 'sf', 'so', 'of', 'rg', 'aas', 'ln', 'p1', 'p2', 'p3', 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 'op1', 'op2', 'sc', 'd1y', 'd1m', 'd1d', 'd2y', 'd2m', 'd2d', 'dt']: out += self.tmpl_input_hidden(name=var, value=vars()[var]) for var in ['ot', 'sp', 'rm']: if vars()[var]: out += self.tmpl_input_hidden(name=var, value=vars()[var]) if pl_in_url: fieldargs = cgi.parse_qs(pl_in_url) for fieldcode in all_fieldcodes: # get_fieldcodes(): if fieldargs.has_key(fieldcode): for val in fieldargs[fieldcode]: out += self.tmpl_input_hidden(name=fieldcode, value=val) out += """  %(jump)s """ % { 'jump' : _("jump to record:"), 'jrec' : jrec, } if not middle_only: out += "%(time)s 
      " else: out += "" out += "
      " return out def tmpl_print_hosted_search_info(self, ln, middle_only, collection, collection_name, collection_id, aas, sf, so, rm, rg, nb_found, of, ot, p, f, f1, f2, f3, m1, m2, m3, op1, op2, p1, p2, p3, d1y, d1m, d1d, d2y, d2m, d2d, dt, all_fieldcodes, cpu_time, pl_in_url, jrec, sc, sp): """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time. Also, prints navigation links (beg/next/prev/end) inside the results set. If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links. This is suitable for displaying navigation links at the bottom of the search results page. Parameters: - 'ln' *string* - The language to display - 'middle_only' *bool* - Only display parts of the interface - 'collection' *string* - the collection name - 'collection_name' *string* - the i18nized current collection name - 'aas' *bool* - if we display the advanced search interface - 'sf' *string* - the currently selected sort format - 'so' *string* - the currently selected sort order ("a" or "d") - 'rm' *string* - selected ranking method - 'rg' *int* - selected results/page - 'nb_found' *int* - number of results found - 'of' *string* - the selected output format - 'ot' *string* - hidden values - 'p' *string* - Current search words - 'f' *string* - the fields in which the search was done - 'f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2' *strings* - the search parameters - 'jrec' *int* - number of first record on this page - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates - 'dt' *string* the dates' type (creation date, modification date) - 'all_fieldcodes' *array* - all the available fields - 'cpu_time' *float* - the time of the query in seconds """ # load the right message language _ = gettext_set_language(ln) out = "" # left table cells: print collection name if not middle_only: out += '''
      ''' % { 'collection_id': collection_id, 'siteurl' : CFG_SITE_URL, 'collection_link': create_html_link(self.build_search_interface_url(c=collection, aas=aas, ln=ln), {}, cgi.escape(collection_name)) } else: out += """
      """ % { 'siteurl' : CFG_SITE_URL } # middle table cell: print beg/next/prev/end arrows: if not middle_only: # in case we have a hosted collection that timed out do not print its number of records, as it is yet unknown if nb_found != -963: out += """
      " else: out += "" # right table cell: cpu time info if not middle_only: if cpu_time > -1: out += """""" % { 'time' : _("Search took %s seconds.") % ('%.2f' % cpu_time), } out += "
      %(collection_link)s %(recs_found)s  """ % { 'recs_found' : _("%s records found") % ('' + self.tmpl_nice_number(nb_found, ln) + '') } #elif nb_found = -963: # out += """ # %(recs_found)s  """ % { # 'recs_found' : _("%s records found") % ('' + self.tmpl_nice_number(nb_found, ln) + '') # } else: out += "" # we do not care about timed out hosted collections here, because the bumber of records found will never be bigger # than rg anyway, since it's negative if nb_found > rg: out += "" + cgi.escape(collection_name) + " : " + _("%s records found") % ('' + self.tmpl_nice_number(nb_found, ln) + '') + "   " if nb_found > rg: # navig.arrows are needed, since we have many hits query = {'p': p, 'f': f, 'cc': collection, 'sf': sf, 'so': so, 'sp': sp, 'rm': rm, 'of': of, 'ot': ot, 'aas': aas, 'ln': ln, 'p1': p1, 'p2': p2, 'p3': p3, 'f1': f1, 'f2': f2, 'f3': f3, 'm1': m1, 'm2': m2, 'm3': m3, 'op1': op1, 'op2': op2, 'sc': 0, 'd1y': d1y, 'd1m': d1m, 'd1d': d1d, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d, 'dt': dt, } # @todo here def img(gif, txt): return '%(txt)s' % { 'txt': txt, 'gif': gif, 'siteurl': CFG_SITE_URL} if jrec - rg > 1: out += create_html_link(self.build_search_url(query, jrec=1, rg=rg), {}, img('sb', _("begin")), {'class': 'img'}) if jrec > 1: out += create_html_link(self.build_search_url(query, jrec=max(jrec - rg, 1), rg=rg), {}, img('sp', _("previous")), {'class': 'img'}) if jrec + rg - 1 < nb_found: out += "%d - %d" % (jrec, jrec + rg - 1) else: out += "%d - %d" % (jrec, nb_found) if nb_found >= jrec + rg: out += create_html_link(self.build_search_url(query, jrec=jrec + rg, rg=rg), {}, img('sn', _("next")), {'class':'img'}) if nb_found >= jrec + rg + rg: out += create_html_link(self.build_search_url(query, jrec=nb_found - rg + 1, rg=rg), {}, img('se', _("end")), {'class': 'img'}) # still in the navigation part cc = collection sc = 0 for var in ['p', 'cc', 'f', 'sf', 'so', 'of', 'rg', 'aas', 'ln', 'p1', 'p2', 'p3', 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 'op1', 'op2', 'sc', 'd1y', 'd1m', 'd1d', 'd2y', 'd2m', 'd2d', 'dt']: out += self.tmpl_input_hidden(name=var, value=vars()[var]) for var in ['ot', 'sp', 'rm']: if vars()[var]: out += self.tmpl_input_hidden(name=var, value=vars()[var]) if pl_in_url: fieldargs = cgi.parse_qs(pl_in_url) for fieldcode in all_fieldcodes: # get_fieldcodes(): if fieldargs.has_key(fieldcode): for val in fieldargs[fieldcode]: out += self.tmpl_input_hidden(name=fieldcode, value=val) out += """  %(jump)s """ % { 'jump' : _("jump to record:"), 'jrec' : jrec, } if not middle_only: out += "%(time)s 
      " else: out += "" out += "
      " return out def tmpl_nice_number(self, number, ln=CFG_SITE_LANG, thousands_separator=',', max_ndigits_after_dot=None): """ Return nicely printed number NUMBER in language LN using given THOUSANDS_SEPARATOR character. If max_ndigits_after_dot is specified and the number is float, the number is rounded by taking in consideration up to max_ndigits_after_dot digit after the dot. This version does not pay attention to locale. See tmpl_nice_number_via_locale(). """ if type(number) is float: if max_ndigits_after_dot is not None: number = round(number, max_ndigits_after_dot) int_part, frac_part = str(number).split('.') return '%s.%s' % (self.tmpl_nice_number(int(int_part), ln, thousands_separator), frac_part) else: chars_in = list(str(number)) number = len(chars_in) chars_out = [] for i in range(0, number): if i % 3 == 0 and i != 0: chars_out.append(thousands_separator) chars_out.append(chars_in[number - i - 1]) chars_out.reverse() return ''.join(chars_out) def tmpl_nice_number_via_locale(self, number, ln=CFG_SITE_LANG): """ Return nicely printed number NUM in language LN using the locale. See also version tmpl_nice_number(). """ if number is None: return None # Temporarily switch the numeric locale to the requested one, and format the number # In case the system has no locale definition, use the vanilla form ol = locale.getlocale(locale.LC_NUMERIC) try: locale.setlocale(locale.LC_NUMERIC, self.tmpl_localemap.get(ln, self.tmpl_default_locale)) except locale.Error: return str(number) try: number = locale.format('%d', number, True) except TypeError: return str(number) locale.setlocale(locale.LC_NUMERIC, ol) return number def tmpl_record_format_htmlbrief_header(self, ln): """Returns the header of the search results list when output is html brief. Note that this function is called for each collection results when 'split by collection' is enabled. See also: tmpl_record_format_htmlbrief_footer, tmpl_record_format_htmlbrief_body Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) out = """
      """ % { 'siteurl' : CFG_SITE_URL, } return out def tmpl_record_format_htmlbrief_footer(self, ln, display_add_to_basket=True): """Returns the footer of the search results list when output is html brief. Note that this function is called for each collection results when 'split by collection' is enabled. See also: tmpl_record_format_htmlbrief_header(..), tmpl_record_format_htmlbrief_body(..) Parameters: - 'ln' *string* - The language to display - 'display_add_to_basket' *bool* - whether to display Add-to-basket button """ # load the right message language _ = gettext_set_language(ln) out = """

      %(add_to_basket)s
      """ % { 'add_to_basket': display_add_to_basket and """""" % _("Add to basket") or "", } return out def tmpl_record_format_htmlbrief_body(self, ln, recid, row_number, relevance, record, relevances_prologue, relevances_epilogue, display_add_to_basket=True): """Returns the html brief format of one record. Used in the search results list for each record. See also: tmpl_record_format_htmlbrief_header(..), tmpl_record_format_htmlbrief_footer(..) Parameters: - 'ln' *string* - The language to display - 'row_number' *int* - The position of this record in the list - 'recid' *int* - The recID - 'relevance' *string* - The relevance of the record - 'record' *string* - The formatted record - 'relevances_prologue' *string* - HTML code to prepend the relevance indicator - 'relevances_epilogue' *string* - HTML code to append to the relevance indicator (used mostly for formatting) """ # load the right message language _ = gettext_set_language(ln) checkbox_for_baskets = """""" % \ {'recid': recid, } if not display_add_to_basket: checkbox_for_baskets = '' out = """ %(checkbox_for_baskets)s %(number)s. """ % {'recid': recid, 'number': row_number, 'checkbox_for_baskets': checkbox_for_baskets} if relevance: out += """
      """ % { 'prologue' : relevances_prologue, 'epilogue' : relevances_epilogue, 'relevance' : relevance } out += """%s""" % record return out def tmpl_print_results_overview(self, ln, results_final_nb_total, cpu_time, results_final_nb, colls, ec, hosted_colls_potential_results_p=False): """Prints results overview box with links to particular collections below. Parameters: - 'ln' *string* - The language to display - 'results_final_nb_total' *int* - The total number of hits for the query - 'colls' *array* - The collections with hits, in the format: - 'coll[code]' *string* - The code of the collection (canonical name) - 'coll[name]' *string* - The display name of the collection - 'results_final_nb' *array* - The number of hits, indexed by the collection codes: - 'cpu_time' *string* - The time the query took - 'url_args' *string* - The rest of the search query - 'ec' *array* - selected external collections - 'hosted_colls_potential_results_p' *boolean* - check if there are any hosted collections searches that timed out during the pre-search """ if len(colls) == 1 and not ec: # if one collection only and no external collections, print nothing: return "" # load the right message language _ = gettext_set_language(ln) # first find total number of hits: # if there were no hosted collections that timed out during the pre-search print out the exact number of records found if not hosted_colls_potential_results_p: out = """''' % { 'more': create_html_link( self.build_search_url(p='refersto:recid:%d' % recID, #XXXX sf=sf, so=so, sp=sp, rm=rm, ln=ln), {}, _("more")), 'similar': similar} return out def tmpl_detailed_record_citations_citation_history(self, recID, ln, citationhistory): """Returns the citations history graph of this record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - citationhistory *string* - citationhistory box """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_GRAPHS and citationhistory is not None: out = '' % citationhistory else: out = "" else: out += "no citationhistory -->" return out def tmpl_detailed_record_citations_co_citing(self, recID, ln, cociting): """Returns the list of cocited records Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - cociting *string* - cociting box """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_STATS and cociting is not None: similar = self.tmpl_print_record_list_for_similarity_boxen ( _("Co-cited with: %s records") % len (cociting), cociting, ln) out = ''' ''' % { 'more': create_html_link(self.build_search_url(p='cocitedwith:%d' % recID, ln=ln), {}, _("more")), 'similar': similar } return out def tmpl_detailed_record_citations_self_cited(self, recID, ln, selfcited, citinglist): """Returns the list of self-citations for this record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - selfcited list - a list of self-citations for recID """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_GRAPHS and selfcited is not None: sc_scorelist = [] #a score list for print.. for s in selfcited: #copy weight from citations weight = 0 for c in citinglist: (crec, score) = c if crec == s: weight = score tmp = [s, weight] sc_scorelist.append(tmp) scite = self.tmpl_print_record_list_for_similarity_boxen ( _(".. of which self-citations: %s records") % len (selfcited), sc_scorelist, ln) out = '' return out def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubdict, citedbylist, kwtuples, authors, vtuples, names_dict, person_link, bibauthorid_data, ln, return_html=False): """Prints stuff about the author given as authorname. 1. Author name + his/her institutes. Each institute I has a link to papers where the auhtor has I as institute. 2. Publications, number: link to search by author. 3. Keywords 4. Author collabs 5. Publication venues like journals The parameters are data structures needed to produce 1-6, as follows: req - request pubs - list of recids, probably the records that have the author as an author authorname - evident num_downloads - evident aff_pubdict - a dictionary where keys are inst names and values lists of recordids citedbylist - list of recs that cite pubs kwtuples - keyword tuples like ('HIGGS BOSON',[3,4]) where 3 and 4 are recids authors - a list of authors that have collaborated with authorname names_dict - a dict of {name: frequency} """ from invenio.search_engine import perform_request_search from operator import itemgetter _ = gettext_set_language(ln) ib_pubs = intbitset(pubs) html = [] # construct an extended search as an interim solution for author id # searches. Will build "(exactauthor:v1 OR exactauthor:v2)" strings # extended_author_search_str = "" # if bibauthorid_data["is_baid"]: # if len(names_dict.keys()) > 1: # extended_author_search_str = '(' # # for name_index, name_query in enumerate(names_dict.keys()): # if name_index > 0: # extended_author_search_str += " OR " # # extended_author_search_str += 'exactauthor:"' + name_query + '"' # # if len(names_dict.keys()) > 1: # extended_author_search_str += ')' # rec_query = 'exactauthor:"' + authorname + '"' # # if bibauthorid_data["is_baid"] and extended_author_search_str: # rec_query = extended_author_search_str baid_query = "" extended_author_search_str = "" if 'is_baid' in bibauthorid_data and bibauthorid_data['is_baid']: if bibauthorid_data["cid"]: baid_query = 'author:%s' % bibauthorid_data["cid"] elif bibauthorid_data["pid"] > -1: baid_query = 'author:%s' % bibauthorid_data["pid"] ## todo: figure out if the author index is filled with pids/cids. ## if not: fall back to exactauthor search. # if not index: # baid_query = "" if not baid_query: baid_query = 'exactauthor:"' + authorname + '"' if bibauthorid_data['is_baid']: if len(names_dict.keys()) > 1: extended_author_search_str = '(' for name_index, name_query in enumerate(names_dict.keys()): if name_index > 0: extended_author_search_str += " OR " extended_author_search_str += 'exactauthor:"' + name_query + '"' if len(names_dict.keys()) > 1: extended_author_search_str += ')' if bibauthorid_data['is_baid'] and extended_author_search_str: baid_query = extended_author_search_str baid_query = baid_query + " " sorted_names_list = sorted(names_dict.iteritems(), key=itemgetter(1), reverse=True) # Prepare data for display # construct names box header = "" + _("Name variants") + "" content = [] for name, frequency in sorted_names_list: prquery = baid_query + ' exactauthor:"' + name + '"' name_lnk = create_html_link(self.build_search_url(p=prquery), {}, str(frequency),) content.append("%s (%s)" % (name, name_lnk)) if not content: content = [_("No Name Variants")] names_box = self.tmpl_print_searchresultbox(header, "
      \n".join(content)) # construct papers box rec_query = baid_query searchstr = create_html_link(self.build_search_url(p=rec_query), {}, "" + "All papers (" + str(len(pubs)) + ")" + "",) line1 = "" + _("Papers") + "" line2 = searchstr if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and num_downloads: line2 += " (" + _("downloaded") + " " line2 += str(num_downloads) + " " + _("times") + ")" if CFG_INSPIRE_SITE: CFG_COLLS = ['Book', 'Conference', 'Introductory', 'Lectures', 'Preprint', 'Published', 'Report', 'Review', 'Thesis'] else: CFG_COLLS = ['Article', 'Book', 'Preprint', ] collsd = {} for coll in CFG_COLLS: coll_papers = list(ib_pubs & intbitset(perform_request_search(f="collection", p=coll))) if coll_papers: collsd[coll] = coll_papers colls = collsd.keys() colls.sort(lambda x, y: cmp(len(collsd[y]), len(collsd[x]))) # sort by number of papers for coll in colls: rec_query = baid_query + 'collection:' + coll line2 += "
      " + create_html_link(self.build_search_url(p=rec_query), {}, coll + " (" + str(len(collsd[coll])) + ")",) if not pubs: line2 = _("No Papers") papers_box = self.tmpl_print_searchresultbox(line1, line2) #make a authoraff string that looks like CERN (1), Caltech (2) etc authoraff = "" aff_pubdict_keys = aff_pubdict.keys() aff_pubdict_keys.sort(lambda x, y: cmp(len(aff_pubdict[y]), len(aff_pubdict[x]))) if aff_pubdict_keys: for a in aff_pubdict_keys: print_a = a if (print_a == ' '): print_a = _("unknown affiliation") if authoraff: authoraff += '
      ' authoraff += create_html_link(self.build_search_url(p=' or '.join(["%s" % x for x in aff_pubdict[a]]), f='recid'), {}, print_a + ' (' + str(len(aff_pubdict[a])) + ')',) else: authoraff = _("No Affiliations") line1 = "" + _("Affiliations") + "" line2 = authoraff affiliations_box = self.tmpl_print_searchresultbox(line1, line2) # print frequent keywords: keywstr = "" if (kwtuples): for (kw, freq) in kwtuples: if keywstr: keywstr += '
      ' rec_query = baid_query + 'keyword:"' + kw + '"' searchstr = create_html_link(self.build_search_url(p=rec_query), {}, kw + " (" + str(freq) + ")",) keywstr = keywstr + " " + searchstr else: keywstr += _('No Keywords') line1 = "" + _("Frequent keywords") + "" line2 = keywstr keyword_box = self.tmpl_print_searchresultbox(line1, line2) header = "" + _("Frequent co-authors") + "" content = [] sorted_coauthors = sorted(sorted(authors.iteritems(), key=itemgetter(0)), key=itemgetter(1), reverse=True) for name, frequency in sorted_coauthors: rec_query = baid_query + 'exactauthor:"' + name + '"' lnk = create_html_link(self.build_search_url(p=rec_query), {}, "%s (%s)" % (name, frequency),) content.append("%s" % lnk) if not content: content = [_("No Frequent Co-authors")] coauthor_box = self.tmpl_print_searchresultbox(header, "
      \n".join(content)) pubs_to_papers_link = create_html_link(self.build_search_url(p=baid_query), {}, str(len(pubs))) display_name = "" try: display_name = sorted_names_list[0][0] except IndexError: display_name = " " headertext = ('

      %s (%s papers)

      ' % (display_name, pubs_to_papers_link)) if return_html: html.append(headertext) else: req.write(headertext) #req.write("

      %s

      " % (authorname)) if person_link: cmp_link = ('' % (CFG_SITE_URL, person_link, _("This is me. Verify my publication list."))) if return_html: html.append(cmp_link) else: req.write(cmp_link) if return_html: html.append("
      %(founds)s
      """ % { 'founds' : _("%(x_fmt_open)sResults overview:%(x_fmt_close)s Found %(x_nb_records)s records in %(x_nb_seconds)s seconds.") % \ {'x_fmt_open': '', 'x_fmt_close': '', 'x_nb_records': '' + self.tmpl_nice_number(results_final_nb_total, ln) + '', 'x_nb_seconds': '%.2f' % cpu_time} } # if there were (only) hosted_collections that timed out during the pre-search print out a fuzzier message else: if results_final_nb_total == 0: out = """
      %(founds)s
      """ % { 'founds' : _("%(x_fmt_open)sResults overview%(x_fmt_close)s") % \ {'x_fmt_open': '', 'x_fmt_close': ''} } elif results_final_nb_total > 0: out = """
      %(founds)s
      """ % { 'founds' : _("%(x_fmt_open)sResults overview:%(x_fmt_close)s Found at least %(x_nb_records)s records in %(x_nb_seconds)s seconds.") % \ {'x_fmt_open': '', 'x_fmt_close': '', 'x_nb_records': '' + self.tmpl_nice_number(results_final_nb_total, ln) + '', 'x_nb_seconds': '%.2f' % cpu_time} } # then print hits per collection: for coll in colls: if results_final_nb.has_key(coll['code']) and results_final_nb[coll['code']] > 0: out += """ %(coll_name)s, %(number)s
      """ % \ {'coll' : coll['id'], 'coll_name' : cgi.escape(coll['name']), 'number' : _("%s records found") % \ ('' + self.tmpl_nice_number(results_final_nb[coll['code']], ln) + '')} # the following is used for hosted collections that have timed out, # i.e. for which we don't know the exact number of results yet. elif results_final_nb.has_key(coll['code']) and results_final_nb[coll['code']] == -963: out += """ %(coll_name)s
      """ % \ {'coll' : coll['id'], 'coll_name' : cgi.escape(coll['name']), 'number' : _("%s records found") % \ ('' + self.tmpl_nice_number(results_final_nb[coll['code']], ln) + '')} out += "
      " return out def tmpl_print_hosted_results(self, url_and_engine, ln, of=None, req=None, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS): """Print results of a given search engine. """ _ = gettext_set_language(ln) #url = url_and_engine[0] engine = url_and_engine[1] #name = _(engine.name) db_id = get_collection_id(engine.name) #base_url = engine.base_url out = "" results = engine.parser.parse_and_get_results(None, of=of, req=req, limit=limit, parseonly=True) if len(results) != 0: if of == 'hb': out += """
      """ % { 'siteurl' : CFG_SITE_URL, 'col_db_id' : db_id, } else: if of == 'hb': out += """
      """ for result in results: out += result.html.replace('>Detailed record<', '>External record<').replace('>Similar records<', '>Similar external records<') if len(results) != 0: if of == 'hb': out += """

      """ % { 'basket' : _("Add to basket") } else: if of == 'hb': out += """
      """ # we have already checked if there are results or no, maybe the following if should be removed? if not results: if of.startswith("h"): out = _('No results found...') + '
      ' return out def tmpl_print_searchresultbox(self, header, body): """print a nicely formatted box for search results """ #_ = gettext_set_language(ln) # first find total number of hits: out = '
      ' + header + '
      ' + body + '
      ' return out def tmpl_search_no_boolean_hits(self, ln, nearestterms): """No hits found, proposes alternative boolean queries Parameters: - 'ln' *string* - The language to display - 'nearestterms' *array* - Parts of the interface to display, in the format: - 'nearestterms[nbhits]' *int* - The resulting number of hits - 'nearestterms[url_args]' *string* - The search parameters - 'nearestterms[p]' *string* - The search terms """ # load the right message language _ = gettext_set_language(ln) out = _("Boolean query returned no hits. Please combine your search terms differently.") out += '''
      ''' for term, hits, argd in nearestterms: out += '''\ ''' % {'hits' : hits, 'link': create_html_link(self.build_search_url(argd), {}, cgi.escape(term), {'class': "nearestterms"})} out += """
      %(hits)s   %(link)s
      """ return out def tmpl_similar_author_names(self, authors, ln): """No hits found, proposes alternative boolean queries Parameters: - 'authors': a list of (name, hits) tuples - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) out = ''' ''' % { 'similar' : _("See also: similar author names") } for author, hits in authors: out += '''\ ''' % {'link': create_html_link( self.build_search_url(p=author, f='author', ln=ln), {}, cgi.escape(author), {'class':"google"}), 'nb' : hits} out += """
      %(similar)s
      %(nb)d %(link)s
      """ return out def tmpl_print_record_detailed(self, recID, ln): """Displays a detailed on-the-fly record Parameters: - 'ln' *string* - The language to display - 'recID' *int* - The record id """ # okay, need to construct a simple "Detailed record" format of our own: out = "

       " # secondly, title: titles = get_fieldvalues(recID, "245__a") for title in titles: out += "

      %s

      " % cgi.escape(title) # thirdly, authors: authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a") if authors: out += "

      " for author in authors: out += '%s; ' % create_html_link(self.build_search_url( ln=ln, p=author, f='author'), {}, cgi.escape(author)) out += "

      " # fourthly, date of creation: dates = get_fieldvalues(recID, "260__c") for date in dates: out += "

      %s

      " % date # fifthly, abstract: abstracts = get_fieldvalues(recID, "520__a") for abstract in abstracts: out += """

      Abstract: %s

      """ % abstract # fifthly bis, keywords: keywords = get_fieldvalues(recID, "6531_a") if len(keywords): out += """

      Keyword(s):""" for keyword in keywords: out += '%s; ' % create_html_link( self.build_search_url(ln=ln, p=keyword, f='keyword'), {}, cgi.escape(keyword)) out += '

      ' # fifthly bis bis, published in: prs_p = get_fieldvalues(recID, "909C4p") prs_v = get_fieldvalues(recID, "909C4v") prs_y = get_fieldvalues(recID, "909C4y") prs_n = get_fieldvalues(recID, "909C4n") prs_c = get_fieldvalues(recID, "909C4c") for idx in range(0, len(prs_p)): out += """

      Publ. in: %s""" % prs_p[idx] if prs_v and prs_v[idx]: out += """%s""" % prs_v[idx] if prs_y and prs_y[idx]: out += """(%s)""" % prs_y[idx] if prs_n and prs_n[idx]: out += """, no.%s""" % prs_n[idx] if prs_c and prs_c[idx]: out += """, p.%s""" % prs_c[idx] out += """.

      """ # sixthly, fulltext link: urls_z = get_fieldvalues(recID, "8564_z") urls_u = get_fieldvalues(recID, "8564_u") # we separate the fulltext links and image links for url_u in urls_u: if url_u.endswith('.png'): continue else: link_text = "URL" try: if urls_z[idx]: link_text = urls_z[idx] except IndexError: pass out += """

      %s: %s

      """ % (link_text, urls_u[idx], urls_u[idx]) # print some white space at the end: out += "

      " return out def tmpl_print_record_list_for_similarity_boxen(self, title, recID_score_list, ln=CFG_SITE_LANG): """Print list of records in the "hs" (HTML Similarity) format for similarity boxes. RECID_SCORE_LIST is a list of (recID1, score1), (recID2, score2), etc. """ from invenio.search_engine import print_record, record_public_p recID_score_list_to_be_printed = [] # firstly find 5 first public records to print: nb_records_to_be_printed = 0 nb_records_seen = 0 while nb_records_to_be_printed < 5 and nb_records_seen < len(recID_score_list) and nb_records_seen < 50: # looking through first 50 records only, picking first 5 public ones (recID, score) = recID_score_list[nb_records_seen] nb_records_seen += 1 if record_public_p(recID): nb_records_to_be_printed += 1 recID_score_list_to_be_printed.append([recID, score]) # secondly print them: out = '''
      %(title)s
      ''' % { 'title': cgi.escape(title) } for recid, score in recID_score_list_to_be_printed: out += ''' ''' % { 'score': score, 'info' : print_record(recid, format="hs", ln=ln), } out += """
      (%(score)s)  %(info)s
      """ return out def tmpl_print_record_brief(self, ln, recID): """Displays a brief record on-the-fly Parameters: - 'ln' *string* - The language to display - 'recID' *int* - The record id """ out = "" # record 'recID' does not exist in format 'format', so print some default format: # firstly, title: titles = get_fieldvalues(recID, "245__a") # secondly, authors: authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a") # thirdly, date of creation: dates = get_fieldvalues(recID, "260__c") # thirdly bis, report numbers: rns = get_fieldvalues(recID, "037__a") rns = get_fieldvalues(recID, "088__a") # fourthly, beginning of abstract: abstracts = get_fieldvalues(recID, "520__a") # fifthly, fulltext link: urls_z = get_fieldvalues(recID, "8564_z") urls_u = get_fieldvalues(recID, "8564_u") # get rid of images images = [] non_image_urls_u = [] for url_u in urls_u: if url_u.endswith('.png'): images.append(url_u) else: non_image_urls_u.append(url_u) ## unAPI identifier out = '\n' % recID out += self.tmpl_record_body( titles=titles, authors=authors, dates=dates, rns=rns, abstracts=abstracts, urls_u=non_image_urls_u, urls_z=urls_z, ln=ln) return out def tmpl_print_record_brief_links(self, ln, recID, sf='', so='d', sp='', rm='', display_claim_link=False): """Displays links for brief record on-the-fly Parameters: - 'ln' *string* - The language to display - 'recID' *int* - The record id """ from invenio.webcommentadminlib import get_nb_reviews, get_nb_comments # load the right message language _ = gettext_set_language(ln) out = '
      ' if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: alephsysnos = get_fieldvalues(recID, "970__a") if len(alephsysnos) > 0: alephsysno = alephsysnos[0] out += '%s' % \ create_html_link(self.build_search_url(recid=alephsysno, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}) else: out += '%s' % \ create_html_link(self.build_search_url(recid=recID, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}) else: out += '%s' % \ create_html_link(self.build_search_url(recid=recID, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}) out += ' - %s' % \ create_html_link(self.build_search_url(p="recid:%d" % recID, rm="wrd", ln=ln), {}, _("Similar records"), {'class': "moreinfo"}) if CFG_BIBRANK_SHOW_CITATION_LINKS: num_timescited = get_cited_by_count(recID) if num_timescited: out += ' - %s' % \ create_html_link(self.build_search_url(p="refersto:recid:%d" % recID, sf=sf, so=so, sp=sp, rm=rm, ln=ln), {}, num_timescited > 1 and _("Cited by %i records") % num_timescited or _("Cited by 1 record"), {'class': "moreinfo"}) else: out += "" if display_claim_link: #Maybe we want not to show the link to who cannot use id? out += ' - %s' % \ create_html_link(CFG_SITE_URL + '/person/action', {'claim':'True', 'selection':str(recID)}, 'Attribute this paper', {'class': "moreinfo"}) if CFG_WEBCOMMENT_ALLOW_COMMENTS and CFG_WEBSEARCH_SHOW_COMMENT_COUNT: num_comments = get_nb_comments(recID, count_deleted=False) if num_comments: out += ' - %s' % \ create_html_link(CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/' + str(recID) + '/comments?ln=%s' % ln, {}, num_comments > 1 and _("%i comments") % (num_comments) or _("1 comment"), {'class': "moreinfo"}) else: out += "" if CFG_WEBCOMMENT_ALLOW_REVIEWS and CFG_WEBSEARCH_SHOW_REVIEW_COUNT: num_reviews = get_nb_reviews(recID, count_deleted=False) if num_reviews: out += ' - %s' % \ create_html_link(CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/' + str(recID) + '/reviews?ln=%s' % ln, {}, num_reviews > 1 and _("%i reviews") % (num_reviews) or _("1 review"), {'class': "moreinfo"}) else: out += "" out += '
      ' return out def tmpl_xml_rss_prologue(self, current_url=None, previous_url=None, next_url=None, first_url=None, last_url=None, nb_found=None, jrec=None, rg=None, cc=None): """Creates XML RSS 2.0 prologue.""" title = CFG_SITE_NAME description = '%s latest documents' % CFG_SITE_NAME if cc and cc != CFG_SITE_NAME: title += ': ' + cgi.escape(cc) description += ' in ' + cgi.escape(cc) out = """ %(rss_title)s %(siteurl)s %(rss_description)s %(sitelang)s %(timestamp)s Invenio %(version)s %(sitesupportemail)s %(timetolive)s%(previous_link)s%(next_link)s%(current_link)s%(total_results)s%(start_index)s%(items_per_page)s %(siteurl)s/img/site_logo_rss.png %(sitename)s %(siteurl)s \n""" return out def tmpl_xml_podcast_prologue(self, current_url=None, previous_url=None, next_url=None, first_url=None, last_url=None, nb_found=None, jrec=None, rg=None, cc=None): """Creates XML podcast prologue.""" title = CFG_SITE_NAME description = '%s latest documents' % CFG_SITE_NAME if CFG_CERN_SITE: title = 'CERN' description = 'CERN latest documents' if cc and cc != CFG_SITE_NAME: title += ': ' + cgi.escape(cc) description += ' in ' + cgi.escape(cc) out = """ %(podcast_title)s %(siteurl)s %(podcast_description)s %(sitelang)s %(timestamp)s Invenio %(version)s %(siteadminemail)s %(timetolive)s%(previous_link)s%(next_link)s%(current_link)s %(siteurl)s/img/site_logo_rss.png %(sitename)s %(siteurl)s %(siteadminemail)s """ % {'sitename': CFG_SITE_NAME, 'siteurl': CFG_SITE_URL, 'sitelang': CFG_SITE_LANG, 'siteadminemail': CFG_SITE_ADMIN_EMAIL, 'timestamp': time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()), 'version': CFG_VERSION, 'sitesupportemail': CFG_SITE_SUPPORT_EMAIL, 'timetolive': CFG_WEBSEARCH_RSS_TTL, 'current_link': (current_url and \ '\n\n' % current_url) or '', 'previous_link': (previous_url and \ '\n' % previous_url) or '', 'next_link': (next_url and \ '\n \n""" return out def tmpl_xml_nlm_prologue(self): """Creates XML NLM prologue.""" out = """\n""" return out def tmpl_xml_nlm_epilogue(self): """Creates XML NLM epilogue.""" out = """\n""" return out def tmpl_xml_refworks_prologue(self): """Creates XML RefWorks prologue.""" out = """\n""" return out def tmpl_xml_refworks_epilogue(self): """Creates XML RefWorks epilogue.""" out = """\n""" return out def tmpl_xml_endnote_prologue(self): """Creates XML EndNote prologue.""" out = """\n""" return out def tmpl_xml_endnote_epilogue(self): """Creates XML EndNote epilogue.""" out = """\n""" return out def tmpl_xml_marc_prologue(self): """Creates XML MARC prologue.""" out = """\n""" return out def tmpl_xml_marc_epilogue(self): """Creates XML MARC epilogue.""" out = """\n""" return out def tmpl_xml_mods_prologue(self): """Creates XML MODS prologue.""" out = """\n""" return out def tmpl_xml_mods_epilogue(self): """Creates XML MODS epilogue.""" out = """\n""" return out def tmpl_xml_default_prologue(self): """Creates XML default format prologue. (Sanity calls only.)""" out = """\n""" return out def tmpl_xml_default_epilogue(self): """Creates XML default format epilogue. (Sanity calls only.)""" out = """\n""" return out def tmpl_collection_not_found_page_title(self, colname, ln=CFG_SITE_LANG): """ Create page title for cases when unexisting collection was asked for. """ _ = gettext_set_language(ln) out = _("Collection %s Not Found") % cgi.escape(colname) return out def tmpl_collection_not_found_page_body(self, colname, ln=CFG_SITE_LANG): """ Create page body for cases when unexisting collection was asked for. """ _ = gettext_set_language(ln) out = """

      %(title)s

      %(sorry)s

      %(you_may_want)s

      """ % { 'title': self.tmpl_collection_not_found_page_title(colname, ln), 'sorry': _("Sorry, collection %s does not seem to exist.") % \ ('' + cgi.escape(colname) + ''), 'you_may_want': _("You may want to start browsing from %s.") % \ ('' + \ cgi.escape(CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)) + '')} return out def tmpl_alert_rss_teaser_box_for_query(self, id_query, ln, display_email_alert_part=True): """Propose teaser for setting up this query as alert or RSS feed. Parameters: - 'id_query' *int* - ID of the query we make teaser for - 'ln' *string* - The language to display - 'display_email_alert_part' *bool* - whether to display email alert part """ # load the right message language _ = gettext_set_language(ln) # get query arguments: res = run_sql("SELECT urlargs FROM query WHERE id=%s", (id_query,)) argd = {} if res: argd = cgi.parse_qs(res[0][0]) rssurl = self.build_rss_url(argd) alerturl = CFG_SITE_URL + '/youralerts/input?ln=%s&idq=%s' % (ln, id_query) if display_email_alert_part: msg_alert = _("""Set up a personal %(x_url1_open)semail alert%(x_url1_close)s or subscribe to the %(x_url2_open)sRSS feed%(x_url2_close)s.""") % \ {'x_url1_open': ' ' % (alerturl, CFG_SITE_URL) + ' ' % (alerturl), 'x_url1_close': '', 'x_url2_open': ' ' % (rssurl, CFG_SITE_URL) + ' ' % rssurl, 'x_url2_close': '', } else: msg_alert = _("""Subscribe to the %(x_url2_open)sRSS feed%(x_url2_close)s.""") % \ {'x_url2_open': ' ' % (rssurl, CFG_SITE_URL) + ' ' % rssurl, 'x_url2_close': '', } out = '''
      %(similar)s
      %(msg_alert)s
      ''' % { 'similar' : _("Interested in being notified about new results for this query?"), 'msg_alert': msg_alert, } return out def tmpl_detailed_record_metadata(self, recID, ln, format, content, creationdate=None, modificationdate=None): """Returns the main detailed page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - 'format' *string* - The format in used to print the record - 'content' *string* - The main content of the page - 'creationdate' *string* - The creation date of the printed record - 'modificationdate' *string* - The last modification date of the printed record """ _ = gettext_set_language(ln) ## unAPI identifier out = '\n' % recID out += content return out def tmpl_record_plots(self, recID, ln): """ Displays little tables containing the images and captions contained in the specified document. Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display """ from invenio.search_engine import get_record from invenio.bibrecord import field_get_subfield_values from invenio.bibrecord import record_get_field_instances _ = gettext_set_language(ln) out = '' rec = get_record(recID) flds = record_get_field_instances(rec, '856', '4') images = [] for fld in flds: image = field_get_subfield_values(fld, 'u') caption = field_get_subfield_values(fld, 'y') if type(image) == list and len(image) > 0: image = image[0] else: continue if type(caption) == list and len(caption) > 0: caption = caption[0] else: continue if not image.endswith('.png'): # huh? continue if len(caption) >= 5: images.append((int(caption[:5]), image, caption[5:])) else: # we don't have any idea of the order... just put it on images.append(99999, image, caption) images = sorted(images, key=lambda x: x[0]) for (index, image, caption) in images: # let's put everything in nice little subtables with the image # next to the caption out = out + '' + \ '' + \ '' + \ '
      ' + \ '' + caption + '
      ' out = out + '

      ' return out def tmpl_detailed_record_statistics(self, recID, ln, downloadsimilarity, downloadhistory, viewsimilarity): """Returns the statistics page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - downloadsimilarity *string* - downloadsimilarity box - downloadhistory *string* - downloadhistory box - viewsimilarity *string* - viewsimilarity box """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and downloadsimilarity is not None: similar = self.tmpl_print_record_list_for_similarity_boxen ( _("People who downloaded this document also downloaded:"), downloadsimilarity, ln) out = '' out += ''' ''' % { 'siteurl': CFG_SITE_URL, 'recid': recID, 'ln': ln, 'similar': similar, 'more': _("more"), 'graph': downloadsimilarity } out += '
      %(graph)s
      %(similar)s
      ' out += '
      ' if CFG_BIBRANK_SHOW_READING_STATS and viewsimilarity is not None: out += self.tmpl_print_record_list_for_similarity_boxen ( _("People who viewed this page also viewed:"), viewsimilarity, ln) if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS and downloadhistory is not None: out += downloadhistory + '
      ' return out def tmpl_detailed_record_citations_prologue(self, recID, ln): """Returns the prologue of the citations page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display """ return '' def tmpl_detailed_record_citations_epilogue(self, recID, ln): """Returns the epilogue of the citations page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display """ return '
      ' def tmpl_detailed_record_citations_citing_list(self, recID, ln, citinglist, sf='', so='d', sp='', rm=''): """Returns the list of record citing this one Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - citinglist *list* - a list of tuples [(x1,y1),(x2,y2),..] where x is doc id and y is number of citations """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_STATS and citinglist is not None: similar = self.tmpl_print_record_list_for_similarity_boxen( _("Cited by: %s records") % len (citinglist), citinglist, ln) out += '''
      %(similar)s %(more)s

      %s
      %(similar)s %(more)s
      ' + scite + '
      ") html.append("") html.append("
      ") html.append(names_box) html.append("
      ") html.append(papers_box) html.append("
      ") html.append(keyword_box) html.append("
       ") html.append(affiliations_box) html.append("
      ") html.append(coauthor_box) html.append("
      ") else: req.write("") req.write("") req.write("
      ") req.write(names_box) req.write("
      ") req.write(papers_box) req.write("
      ") req.write(keyword_box) req.write("
       ") req.write(affiliations_box) req.write("
      ") req.write(coauthor_box) req.write("
      ") # print citations: rec_query = baid_query if len(citedbylist): line1 = "" + _("Citations:") + "" line2 = "" if not pubs: line2 = _("No Citation Information available") sr_box = self.tmpl_print_searchresultbox(line1, line2) if return_html: html.append(sr_box) else: req.write(sr_box) if return_html: return "\n".join(html) # print frequent co-authors: # collabstr = "" # if (authors): # for c in authors: # c = c.strip() # if collabstr: # collabstr += '
      ' # #do not add this person him/herself in the list # cUP = c.upper() # authornameUP = authorname.upper() # if not cUP == authornameUP: # commpubs = intbitset(pubs) & intbitset(perform_request_search(p="exactauthor:\"%s\" exactauthor:\"%s\"" % (authorname, c))) # collabstr = collabstr + create_html_link(self.build_search_url(p='exactauthor:"' + authorname + '" exactauthor:"' + c + '"'), # {}, c + " (" + str(len(commpubs)) + ")",) # else: collabstr += 'None' # banner = self.tmpl_print_searchresultbox("" + _("Frequent co-authors:") + "", collabstr) # print frequently publishes in journals: #if (vtuples): # pubinfo = "" # for t in vtuples: # (journal, num) = t # pubinfo += create_html_link(self.build_search_url(p='exactauthor:"' + authorname + '" ' + \ # 'journal:"' + journal + '"'), # {}, journal + " ("+str(num)+")
      ") # banner = self.tmpl_print_searchresultbox("" + _("Frequently publishes in:") + "", pubinfo) # req.write(banner) def tmpl_detailed_record_references(self, recID, ln, content): """Returns the discussion page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - 'content' *string* - The main content of the page """ # load the right message language _ = gettext_set_language(ln) out = '' if content is not None: out += content return out def tmpl_citesummary_prologue(self, d_total_recs, l_colls, searchpattern, searchfield, ln=CFG_SITE_LANG): """HTML citesummary format, prologue. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """

      """ % \ {'msg_title': _("Citation summary results"), } for coll, colldef in l_colls: out += '' % coll out += '' out += """""" % \ {'msg_recs': _("Total number of citable papers analyzed:"), } for coll, colldef in l_colls: link_url = CFG_SITE_URL + '/search?p=' if searchpattern: p = searchpattern if searchfield: if " " in searchpattern: p = searchfield + ':"' + searchpattern + '"' else: p = searchfield + ':' + searchpattern link_url += quote(p) if colldef: link_url += '%20AND%20' + quote(colldef) link_url += '&rm=citation'; link_text = self.tmpl_nice_number(d_total_recs[coll], ln) out += '' % (link_url, link_text) out += '' return out def tmpl_citesummary_overview(self, d_total_cites, d_avg_cites, l_colls, ln=CFG_SITE_LANG): """HTML citesummary format, overview. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """""" % \ {'msg_cites': _("Total number of citations:"), } for coll, colldef in l_colls: out += '' % self.tmpl_nice_number(d_total_cites[coll], ln) out += '' out += """""" % \ {'msg_avgcit': _("Average citations per paper:"), } for coll, colldef in l_colls: out += '' % d_avg_cites[coll] out += '' out += """""" % \ {'msg_breakdown': _("Breakdown of papers by citations:"), } return out def tmpl_citesummary_breakdown_by_fame(self, d_cites, low, high, fame, l_colls, searchpattern, searchfield, ln=CFG_SITE_LANG): """HTML citesummary format, breakdown by fame. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """""" % \ {'fame': fame, } for coll, colldef in l_colls: link_url = CFG_SITE_URL + '/search?p=' if searchpattern: p = searchpattern if searchfield: if " " in searchpattern: p = searchfield + ':"' + searchpattern + '"' else: p = searchfield + ':' + searchpattern link_url += quote(p) + '%20AND%20' if colldef: link_url += quote(colldef) + '%20AND%20' if low == 0 and high == 0: link_url += quote('cited:0') else: link_url += quote('cited:%i->%i' % (low, high)) link_url += '&rm=citation'; link_text = self.tmpl_nice_number(d_cites[coll], ln) out += '' % (link_url, link_text) out += '' return out def tmpl_citesummary_h_index(self, d_h_factors, l_colls, ln=CFG_SITE_LANG): """HTML citesummary format, h factor output. A part of the HCS suite.""" _ = gettext_set_language(ln) out = "" % \ {'msg_additional': _("Additional Citation Metrics"), 'help_url': CFG_SITE_URL + '/help/citation-metrics', } out += '' for coll, colldef in l_colls: out += '' % self.tmpl_nice_number(d_h_factors[coll], ln) out += '' return out def tmpl_citesummary_epilogue(self, ln=CFG_SITE_LANG): """HTML citesummary format, epilogue. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """
      %(msg_title)s%s
      %(msg_recs)s%s
      %(msg_cites)s%s
      %(msg_avgcit)s%.1f
      %(msg_breakdown)s
      %(fame)s%s
      %(msg_additional)s [?]
      h-index [' % (CFG_SITE_URL + '/help/citation-metrics#citesummary_h-index') out += '?]%s
      """ return out def tmpl_unapi(self, formats, identifier=None): """ Provide a list of object format available from the unAPI service for the object identified by IDENTIFIER """ out = '\n' if identifier: out += '\n' % (identifier) else: out += "\n" for format_name, format_type in formats.iteritems(): docs = '' if format_name == 'xn': docs = 'http://www.nlm.nih.gov/databases/dtd/' format_type = 'application/xml' format_name = 'nlm' elif format_name == 'xm': docs = 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd' format_type = 'application/xml' format_name = 'marcxml' elif format_name == 'xr': format_type = 'application/rss+xml' docs = 'http://www.rssboard.org/rss-2-0/' elif format_name == 'xw': format_type = 'application/xml' docs = 'http://www.refworks.com/RefWorks/help/RefWorks_Tagged_Format.htm' elif format_name == 'xoaidc': format_type = 'application/xml' docs = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd' elif format_name == 'xe': format_type = 'application/xml' docs = 'http://www.endnote.com/support/' format_name = 'endnote' elif format_name == 'xd': format_type = 'application/xml' docs = 'http://dublincore.org/schemas/' format_name = 'dc' elif format_name == 'xo': format_type = 'application/xml' docs = 'http://www.loc.gov/standards/mods/v3/mods-3-3.xsd' format_name = 'mods' if docs: out += '\n' % (xml_escape(format_name), xml_escape(format_type), xml_escape(docs)) else: out += '\n' % (xml_escape(format_name), xml_escape(format_type)) out += "" return out diff --git a/modules/websearch/lib/websearch_webinterface.py b/modules/websearch/lib/websearch_webinterface.py index 86984425c..cbaa1c80f 100644 --- a/modules/websearch/lib/websearch_webinterface.py +++ b/modules/websearch/lib/websearch_webinterface.py @@ -1,1878 +1,1878 @@ ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSearch URL handler.""" __revision__ = "$Id$" import cgi import os import datetime import time import sys from urllib import quote from invenio import webinterface_handler_config as apache import threading #maximum number of collaborating authors etc shown in GUI MAX_COLLAB_LIST = 10 MAX_KEYWORD_LIST = 10 MAX_VENUE_LIST = 10 #tag constants AUTHOR_TAG = "100__a" AUTHOR_INST_TAG = "100__u" COAUTHOR_TAG = "700__a" COAUTHOR_INST_TAG = "700__u" VENUE_TAG = "909C4p" KEYWORD_TAG = "695__a" FKEYWORD_TAG = "6531_a" CFG_INSPIRE_UNWANTED_KEYWORDS_START = ['talk', 'conference', 'conference proceedings', 'numerical calculations', 'experimental results', 'review', 'bibliography', 'upper limit', 'lower limit', 'tables', 'search for', 'on-shell', 'off-shell', 'formula', 'lectures', 'book', 'thesis'] CFG_INSPIRE_UNWANTED_KEYWORDS_MIDDLE = ['GeV', '(('] if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from invenio.config import \ CFG_SITE_URL, \ CFG_SITE_NAME, \ CFG_CACHEDIR, \ CFG_SITE_LANG, \ CFG_SITE_SECURE_URL, \ CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \ CFG_WEBSEARCH_INSTANT_BROWSE_RSS, \ CFG_WEBSEARCH_RSS_TTL, \ CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS, \ CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \ CFG_WEBDIR, \ CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS, \ CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \ CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL, \ CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \ CFG_WEBSEARCH_RSS_I18N_COLLECTIONS, \ CFG_INSPIRE_SITE, \ CFG_WEBSEARCH_WILDCARD_LIMIT, \ CFG_SITE_RECORD from invenio.dbquery import Error from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.urlutils import redirect_to_url, make_canonical_urlargd, drop_default_urlargd from invenio.htmlutils import get_mathjax_header from invenio.htmlutils import nmtoken_from_string from invenio.webuser import getUid, page_not_authorized, get_user_preferences, \ collect_user_info, logoutUser, isUserSuperAdmin from invenio.websubmit_webinterface import WebInterfaceFilesPages from invenio.webcomment_webinterface import WebInterfaceCommentsPages from invenio.bibcirculation_webinterface import WebInterfaceHoldingsPages from invenio.webpage import page, pageheaderonly, create_error_box from invenio.messages import gettext_set_language from invenio.search_engine import check_user_can_view_record, \ collection_reclist_cache, \ collection_restricted_p, \ create_similarly_named_authors_link_box, \ get_colID, \ get_coll_i18nname, \ - get_fieldvalues, \ get_fieldvalues_alephseq_like, \ get_most_popular_field_values, \ get_mysql_recid_from_aleph_sysno, \ guess_primary_collection_of_a_record, \ page_end, \ page_start, \ perform_request_cache, \ perform_request_log, \ perform_request_search, \ restricted_collection_cache, \ get_coll_normalised_name +from invenio.search_engine_utils import get_fieldvalues from invenio.access_control_engine import acc_authorize_action from invenio.access_control_config import VIEWRESTRCOLL from invenio.access_control_mailcookie import mail_cookie_create_authorize_action from invenio.bibformat import format_records from invenio.bibformat_engine import get_output_formats from invenio.websearch_webcoll import get_collection from invenio.intbitset import intbitset from invenio.bibupload import find_record_from_sysno from invenio.bibrank_citation_searcher import get_cited_by_list from invenio.bibrank_downloads_indexer import get_download_weight_total from invenio.search_engine_summarizer import summarize_records from invenio.errorlib import register_exception from invenio.bibedit_webinterface import WebInterfaceEditPages from invenio.bibeditmulti_webinterface import WebInterfaceMultiEditPages from invenio.bibmerge_webinterface import WebInterfaceMergePages from invenio.search_engine import get_record from invenio.shellutils import mymkdir import invenio.template websearch_templates = invenio.template.load('websearch') search_results_default_urlargd = websearch_templates.search_results_default_urlargd search_interface_default_urlargd = websearch_templates.search_interface_default_urlargd try: output_formats = [output_format['attrs']['code'].lower() for output_format in \ get_output_formats(with_attributes=True).values()] except KeyError: output_formats = ['xd', 'xm', 'hd', 'hb', 'hs', 'hx'] output_formats.extend(['hm', 't', 'h']) def wash_search_urlargd(form): """ Create canonical search arguments from those passed via web form. """ argd = wash_urlargd(form, search_results_default_urlargd) if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] # Sometimes, users pass ot=245,700 instead of # ot=245&ot=700. Normalize that. ots = [] for ot in argd['ot']: ots += ot.split(',') argd['ot'] = ots # We can either get the mode of function as # action=, or by setting action_browse or # action_search. if argd['action_browse']: argd['action'] = 'browse' elif argd['action_search']: argd['action'] = 'search' else: if argd['action'] not in ('browse', 'search'): argd['action'] = 'search' del argd['action_browse'] del argd['action_search'] return argd class WebInterfaceUnAPIPages(WebInterfaceDirectory): """ Handle /unapi set of pages.""" _exports = [''] def __call__(self, req, form): argd = wash_urlargd(form, { 'id' : (int, 0), 'format' : (str, '')}) formats_dict = get_output_formats(True) formats = {} for format in formats_dict.values(): if format['attrs']['visibility']: formats[format['attrs']['code'].lower()] = format['attrs']['content_type'] del formats_dict if argd['id'] and argd['format']: ## Translate back common format names format = { 'nlm' : 'xn', 'marcxml' : 'xm', 'dc' : 'xd', 'endnote' : 'xe', 'mods' : 'xo' }.get(argd['format'], argd['format']) if format in formats: redirect_to_url(req, '%s/%s/%s/export/%s' % (CFG_SITE_URL, CFG_SITE_RECORD, argd['id'], format)) else: raise apache.SERVER_RETURN, apache.HTTP_NOT_ACCEPTABLE elif argd['id']: return websearch_templates.tmpl_unapi(formats, identifier=argd['id']) else: return websearch_templates.tmpl_unapi(formats) index = __call__ class WebInterfaceAuthorPagesCacheUpdater(threading.Thread): ''' Handle asynchronous cache updates in the background as a loose Thread. ''' def __init__(self, req, form, identifier, current_cache): threading.Thread.__init__(self) self.req = req self.form = form self.identifier = identifier self.current_cache = current_cache def run(self): webint = WebInterfaceAuthorPages() webint.pageparam = self.identifier c = datetime.datetime.now() - self.current_cache[4] delay = (c.microseconds + (c.seconds + c.days * 24 * 3600) * 10 ** 6) / 10 ** 6 if delay < 3600 * 1: pass else: webint.update_cache_timestamp(self.identifier) page = webint.create_authorpage(self.req, self.form, return_html=True) webint.update_cache(self.identifier, page) class WebInterfaceAuthorPages(WebInterfaceDirectory): """ Handle /author/Doe%2C+John page requests as well as /author/: (e.g. /author/15:Doe%2C+John) requests. The latter will try to find a person from the personid universe and will display the joint information from that particular author cluster. This interface will handle the following URLs: - /author/Doe%2C+John which will show information on the exactauthor search - /author/: (e.g. /author/15:Doe%2C+John) will try to find a person from the personid universe and will display the joint information from that particular author cluster. - /author/ (e.g. /author/152) will display the joint information from that particular author cluster (an entity called person). """ _exports = ['author'] def __init__(self, pageparam=''): """Constructor.""" self.pageparam = cgi.escape(pageparam.replace("+", " ")) self.personid = -1 self.authorname = " " self.person_data_available = False self.person_search_results = None self.cache_supported = False self.pt = None try: import bibauthorid_personid_tables_utils as pt self.cache_supported = True self.pt = pt except ImportError: self.cache_supported = False self.pt = pt def _lookup(self, component, path): """This handler parses dynamic URLs (/author/John+Doe).""" return WebInterfaceAuthorPages(component), path def update_cache_timestamp(self, pageparam): ''' update cache timestamp to prevent multiple threads computing the same page at the same time ''' if not pageparam: return if not self.cache_supported: return self.pt.update_cached_author_page_timestamp(pageparam) def update_cache(self, pageparam, pagecontent): ''' Triggers the update to the DB @param pageparam: identifier for the cache @type pageparam: string @param pagecontent: content to write to cache @type pagecontent: string ''' #TABLE: id, tag, identifier, data, date if not pageparam: return if not pagecontent: return if not self.cache_supported: return self.pt.update_cached_author_page(pageparam, pagecontent) def __call__(self, req, form): ''' Cache manager for the author pages. #look up self.pageparam in cache table #if up to date return it #if not up to date: - # if exists: + # if exists: # return it and update # else: # create, update, return @param req: Apache request object @type req: Apache request object @param form: Parameters @type form: dict @return: HTML code for the author or author search page @rtype: string ''' argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0), 'recid': (int, -1) }) param_recid = argd['recid'] ln = argd['ln'] req.argd = argd #needed since perform_req_search _ = gettext_set_language(ln) title_message = "Author Details" page_content = "" is_bibauthorid = False try: from invenio.bibauthorid_webapi import search_person_ids_by_name from invenio.bibauthorid_webapi import get_papers_by_person_id from invenio.bibauthorid_webapi import get_person_names_from_id from invenio.bibauthorid_webapi import get_person_db_names_from_id from invenio.bibauthorid_webapi import get_person_redirect_link from invenio.bibauthorid_webapi import is_valid_canonical_id from invenio.bibauthorid_webapi import get_personid_status_cacher from invenio.bibauthorid_utils import create_normalized_name from invenio.bibauthorid_utils import split_name_parts # from invenio.bibauthorid_config import CLAIMPAPER_CLAIM_OTHERS_PAPERS from invenio.bibauthorid_config import AID_ENABLED from invenio.bibauthorid_config import AID_ON_AUTHORPAGES bibauthorid_template = invenio.template.load('bibauthorid') import bibauthorid_personid_tables_utils as pt is_bibauthorid = True except ImportError: return self.create_authorpage(req, form) if not AID_ENABLED or not AID_ON_AUTHORPAGES: is_bibauthorid = False self.resolve_personid(param_recid) if self.personid > -1: identifier = self.personid else: identifier = self.pageparam cached_page = pt.get_cached_author_page(identifier) if cached_page: page_content = cached_page[3] background = WebInterfaceAuthorPagesCacheUpdater(req, form, identifier, cached_page) background.start() else: pagecontent = self.create_authorpage(req, form, return_html=True) self.update_cache(identifier, pagecontent) page_content = pagecontent metaheaderadd = "" if is_bibauthorid: metaheaderadd = bibauthorid_template.tmpl_meta_includes() # Start the page in clean manner: req.content_type = "text/html" req.send_http_header() req.write(pageheaderonly(req=req, title=title_message, metaheaderadd=metaheaderadd, language=ln)) req.write(websearch_templates.tmpl_search_pagestart(ln=ln)) req.write(page_content) return page_end(req, 'hb', ln) def resolve_personid(self, param_recid): ''' Resolves the Person ID from a given string. @param param_recid: record ID parameter @type param_recid: int ''' try: from invenio.bibauthorid_webapi import search_person_ids_by_name from invenio.bibauthorid_webapi import get_papers_by_person_id from invenio.bibauthorid_webapi import get_person_id_from_canonical_id from invenio.bibauthorid_webapi import is_valid_canonical_id from invenio.bibauthorid_config import AID_ENABLED from invenio.bibauthorid_config import AID_ON_AUTHORPAGES # from invenio.access_control_admin import acc_find_user_role_actions if not AID_ENABLED or not AID_ON_AUTHORPAGES: is_bibauthorid = False else: is_bibauthorid = True except (ImportError): is_bibauthorid = False from operator import itemgetter authors = [] recid = None nquery = "" #check if it is a person id (e.g. 144): try: self.personid = int(self.pageparam) except (ValueError, TypeError): self.personid = -1 if self.personid > -1: return #check if it is a canonical ID (e.g. Ellis_J_1): if is_bibauthorid and is_valid_canonical_id(self.pageparam): try: self.personid = int(get_person_id_from_canonical_id(self.pageparam)) except (ValueError, TypeError): self.personid = -1 if self.personid < 0 and is_bibauthorid: if param_recid > -1: # Well, it's not a person id, did we get a record ID? recid = param_recid nquery = self.pageparam elif self.pageparam.count(":"): # No recid passed, maybe name is recid:name or name:recid pair? left, right = self.pageparam.split(":") try: recid = int(left) nquery = str(right) except (ValueError, TypeError): try: recid = int(right) nquery = str(left) except (ValueError, TypeError): recid = None nquery = self.pageparam else: # No recid could be determined. Work with name only nquery = self.pageparam sorted_results = search_person_ids_by_name(nquery) test_results = None if recid: for results in sorted_results: pid = results[0] authorpapers = get_papers_by_person_id(pid, -1) authorpapers = sorted(authorpapers, key=itemgetter(0), reverse=True) if (recid and not (str(recid) in [row[0] for row in authorpapers])): continue authors.append([results[0], results[1], authorpapers[0:4]]) test_results = authors else: test_results = [i for i in sorted_results if i[1][0][2] > .8] if len(test_results) == 1: self.personid = test_results[0][0] else: self.person_search_results = sorted_results def create_authorpage(self, req, form, return_html=False): ''' Creates an author page in a given language If no author is found, return person search or an empty author page @param req: Apache request object @type req: Apache request object @param form: URL parameters @type form: dict @param return_html: if False: write to req object consecutively else construct and return html code for the caches @type return_html: boolean ''' is_bibauthorid = False bibauthorid_template = None personid_status_cacher = None userinfo = collect_user_info(req) metaheaderadd = "" html = [] try: from invenio.bibauthorid_webapi import search_person_ids_by_name from invenio.bibauthorid_webapi import get_papers_by_person_id from invenio.bibauthorid_webapi import get_person_names_from_id from invenio.bibauthorid_webapi import get_person_db_names_from_id from invenio.bibauthorid_webapi import get_person_redirect_link from invenio.bibauthorid_webapi import is_valid_canonical_id from invenio.bibauthorid_webapi import get_personid_status_cacher from invenio.bibauthorid_utils import create_normalized_name from invenio.bibauthorid_utils import split_name_parts # from invenio.bibauthorid_config import CLAIMPAPER_CLAIM_OTHERS_PAPERS from invenio.bibauthorid_config import AID_ENABLED from invenio.bibauthorid_config import AID_ON_AUTHORPAGES bibauthorid_template = invenio.template.load('bibauthorid') # from invenio.access_control_admin import acc_find_user_role_actions if not AID_ENABLED or not AID_ON_AUTHORPAGES: is_bibauthorid = False else: is_bibauthorid = True except (ImportError): is_bibauthorid = False from operator import itemgetter argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0), 'recid': (int, -1) }) ln = argd['ln'] verbose = argd['verbose'] req.argd = argd #needed since perform_req_search param_recid = argd['recid'] bibauthorid_data = {"is_baid": is_bibauthorid, "pid":-1, "cid": ""} pubs = [] authors = [] recid = None nquery = "" names_dict = {} db_names_dict = {} _ = gettext_set_language(ln) title_message = "Author Details" #let's see what takes time.. time1 = time.time() genstart = time1 time2 = time.time() if is_bibauthorid: metaheaderadd = bibauthorid_template.tmpl_meta_includes() if not return_html: # Start the page in clean manner: req.content_type = "text/html" req.send_http_header() req.write(pageheaderonly(req=req, title=title_message, metaheaderadd=metaheaderadd, language=ln)) req.write(websearch_templates.tmpl_search_pagestart(ln=ln)) if is_bibauthorid: personid_status_cacher = get_personid_status_cacher() personid_status_cacher.recreate_cache_if_needed() self.person_data_available = personid_status_cacher.cache if not self.person_data_available: is_bibauthorid = False if is_bibauthorid: self.resolve_personid(param_recid) if self.person_search_results: if bibauthorid_template and nquery: authors = [] for results in self.person_search_results: pid = results[0] authorpapers = get_papers_by_person_id(pid, -1) authorpapers = sorted(authorpapers, key=itemgetter(0), reverse=True) authors.append([results[0], results[1], authorpapers[0:4]]) srch = bibauthorid_template.tmpl_author_search body = srch(nquery, authors, author_pages_mode=True) if return_html: html.append(body) return "\n".join(html) else: req.write(body) return # start page # req.content_type = "text/html" # req.send_http_header() # uid = getUid(req) # page_start(req, "hb", "", "", ln, uid) if self.personid < 0 and is_bibauthorid: # Well, no person. Fall back to the exact author name search then. ptitle = '' if recid: try: ptitle = get_record(recid)['245'][0][0][0][1] except (IndexError, TypeError, KeyError): ptitle = '"Title not available"' self.authorname = self.pageparam title = '' pmsg = '' if ptitle: pmsg = " on paper '%s'" % ptitle # We're sorry we're introducing html tags where they weren't before. XXX message = "" if CFG_INSPIRE_SITE: message += ("

      We are in the process of attributing papers to people so that we can " "improve publication lists.

      \n") message += ("

      We have not generated the publication list for author '%s'%s. Please be patient as we " "continue to match people to author names and publications. '%s' may be attributed in the next " "few weeks.

      " % (self.pageparam, pmsg, self.pageparam)) if return_html: html.append('
      ' % title) html.append('%s
      ' % message) else: req.write('
      ' % title) req.write('%s
      ' % message) if not nquery: nquery = self.pageparam if not authors: authors = [] sorted_results = search_person_ids_by_name(nquery) for results in sorted_results: pid = results[0] authorpapers = get_papers_by_person_id(pid, -1) authorpapers = sorted(authorpapers, key=itemgetter(0), reverse=True) authors.append([results[0], results[1], authorpapers[0:4]]) srch = bibauthorid_template.tmpl_author_search body = srch(nquery, authors, author_pages_mode=True) if return_html: html.append(body) return "\n".join(html) else: req.write(body) return # return self._psearch(req, form, is_fallback=True, fallback_query=self.pageparam, fallback_title=title, fallback_message=message) elif self.personid < 0 and not is_bibauthorid: if not self.pageparam: return websearch_templates.tmpl_author_information(req, {}, self.authorname, 0, {}, {}, {}, {}, {}, {}, None, bibauthorid_data, ln, return_html) self.authorname = self.pageparam #search the publications by this author pubs = perform_request_search(req=None, p=self.authorname, f="exactauthor") names_dict[self.authorname] = len(pubs) db_names_dict[self.authorname] = len(pubs) elif is_bibauthorid and self.personid > -1: #yay! Person found! find only papers not disapproved by humans if return_html: html.append("") else: req.write("") full_pubs = get_papers_by_person_id(self.personid, -1) pubs = [int(row[0]) for row in full_pubs] longest_name = "" try: self.personid = int(self.personid) except (TypeError, ValueError): raise ValueError("Personid must be a number!") for aname, acount in get_person_names_from_id(self.personid): names_dict[aname] = acount norm_name = create_normalized_name(split_name_parts(aname)) if len(norm_name) > len(longest_name): longest_name = norm_name for aname, acount in get_person_db_names_from_id(self.personid): aname = aname.replace('"', '').strip() db_names_dict[aname] = acount self.authorname = longest_name if not pubs and param_recid > -1: if return_html: html.append("

      ") html.append(_("We're sorry. The requested author \"%s\" seems not to be listed on the specified paper." % (self.pageparam,))) html.append("
      ") html.append(_("Please try the following link to start a broader search on the author: ")) html.append('%s' % (CFG_SITE_URL, self.pageparam, self.pageparam)) html.append("

      ") return "\n".join(html) else: req.write("

      ") req.write(_("We're sorry. The requested author \"%s\" seems not to be listed on the specified paper." % (self.pageparam,))) req.write("
      ") req.write(_("Please try the following link to start a broader search on the author: ")) req.write('%s' % (CFG_SITE_URL, self.pageparam, self.pageparam)) req.write("

      ") return page_end(req, 'hb', ln) #get most frequent authors of these pubs popular_author_tuples = get_most_popular_field_values(pubs, (AUTHOR_TAG, COAUTHOR_TAG)) coauthors = {} for (coauthor, frequency) in popular_author_tuples: if coauthor not in db_names_dict: coauthors[coauthor] = frequency if len(coauthors) > MAX_COLLAB_LIST: break time1 = time.time() if verbose == 9 and not return_html: req.write("
      popularized authors: " + str(time1 - time2) + "
      ") #and publication venues venuetuples = get_most_popular_field_values(pubs, (VENUE_TAG)) time2 = time.time() if verbose == 9 and not return_html: req.write("
      venues: " + str(time2 - time1) + "
      ") #and keywords kwtuples = get_most_popular_field_values(pubs, (KEYWORD_TAG, FKEYWORD_TAG), count_repetitive_values=False) if CFG_INSPIRE_SITE: # filter kw tuples against unwanted keywords: kwtuples_filtered = () for (kw, num) in kwtuples: kwlower = kw.lower() kwlower_unwanted = False for unwanted_keyword in CFG_INSPIRE_UNWANTED_KEYWORDS_START: if kwlower.startswith(unwanted_keyword): kwlower_unwanted = True # unwanted keyword found break for unwanted_keyword in CFG_INSPIRE_UNWANTED_KEYWORDS_MIDDLE: if unwanted_keyword in kwlower: kwlower_unwanted = True # unwanted keyword found break if not kwlower_unwanted: kwtuples_filtered += ((kw, num),) kwtuples = kwtuples_filtered time1 = time.time() if verbose == 9 and not return_html: req.write("
      keywords: " + str(time1 - time2) + "
      ") #construct a simple list of tuples that contains keywords that appear #more than once moreover, limit the length of the list #to MAX_KEYWORD_LIST kwtuples = kwtuples[0:MAX_KEYWORD_LIST] vtuples = venuetuples[0:MAX_VENUE_LIST] time2 = time.time() if verbose == 9 and not return_html: req.write("
      misc: " + str(time2 - time1) + "
      ") #a dict. keys: affiliations, values: lists of publications author_aff_pubs = self.get_institute_pub_dict(pubs, db_names_dict.keys()) time1 = time.time() if verbose == 9 and not return_html: req.write("
      affiliations: " + str(time1 - time2) + "
      ") totaldownloads = 0 if CFG_BIBRANK_SHOW_DOWNLOAD_STATS: #find out how many times these records have been downloaded recsloads = {} recsloads = get_download_weight_total(recsloads, pubs) #sum up for k in recsloads.keys(): totaldownloads = totaldownloads + recsloads[k] #get cited by.. citedbylist = get_cited_by_list(pubs) person_link = None if (is_bibauthorid and self.personid >= 0 and "precached_viewclaimlink" in userinfo and "precached_usepaperattribution" in userinfo and "precached_usepaperclaim" in userinfo and (userinfo["precached_usepaperclaim"] or userinfo["precached_usepaperattribution"]) ): person_link = self.personid bibauthorid_data["pid"] = self.personid cid = get_person_redirect_link(self.personid) if is_valid_canonical_id(cid): person_link = cid bibauthorid_data["cid"] = cid time1 = time.time() if verbose == 9 and not return_html: req.write("
      citedby: " + str(time1 - time2) + "
      ") #finally all stuff there, call the template if return_html: html.append(websearch_templates.tmpl_author_information(req, pubs, self.authorname, totaldownloads, author_aff_pubs, citedbylist, kwtuples, coauthors, vtuples, db_names_dict, person_link, bibauthorid_data, ln, return_html)) else: websearch_templates.tmpl_author_information(req, pubs, self.authorname, totaldownloads, author_aff_pubs, citedbylist, kwtuples, coauthors, vtuples, db_names_dict, person_link, bibauthorid_data, ln, return_html) time1 = time.time() #cited-by summary rec_query = "" extended_author_search_str = "" if bibauthorid_data['is_baid']: if bibauthorid_data["cid"]: rec_query = 'author:"%s"' % bibauthorid_data["cid"] elif bibauthorid_data["pid"] > -1: rec_query = 'author:"%s"' % bibauthorid_data["pid"] if not rec_query: rec_query = 'exactauthor:"' + self.authorname + '"' if is_bibauthorid: if len(db_names_dict.keys()) > 1: extended_author_search_str = '(' for name_index, name_query in enumerate(db_names_dict.keys()): if name_index > 0: extended_author_search_str += " OR " extended_author_search_str += 'exactauthor:"' + name_query + '"' if len(db_names_dict.keys()) > 1: extended_author_search_str += ')' if is_bibauthorid and extended_author_search_str: rec_query = extended_author_search_str if pubs: if return_html: html.append(summarize_records(intbitset(pubs), 'hcs', ln, rec_query)) else: req.write(summarize_records(intbitset(pubs), 'hcs', ln, rec_query, req=req)) time2 = time.time() if verbose == 9 and not return_html: req.write("
      summarizer: " + str(time2 - time1) + "
      ") # simauthbox = create_similarly_named_authors_link_box(self.authorname) # req.write(simauthbox) if verbose == 9 and not return_html: req.write("
      all: " + str(time.time() - genstart) + "
      ") if return_html: return "\n".join(html) else: return page_end(req, 'hb', ln) def _psearch(self, req, form, is_fallback=True, fallback_query='', fallback_title='', fallback_message=''): html = [] h = html.append if fallback_title: h('
      ' % fallback_title) if fallback_message: h('%s
      ' % fallback_message) h(' We may have \'%s\' partially matched; click here ' % (fallback_query, fallback_query)) h('to see what we have so far. (Note: this is likely to update frequently.') return "\n".join(html) def get_institute_pub_dict(self, recids, names_list): """return a dictionary consisting of institute -> list of publications""" author_aff_pubs = {} #the dictionary to be built for recid in recids: #iterate all so that we get first author's intitute #if this the first author OR #"his" institute if he is an affliate author affus = [] #list of insts from the given record mainauthors = get_fieldvalues(recid, AUTHOR_TAG) mainauthor = " " if mainauthors: mainauthor = mainauthors[0] if (mainauthor in names_list): affus = get_fieldvalues(recid, AUTHOR_INST_TAG) else: #search for coauthors.. coauthor_field_lines = [] coauthorfield_content = get_fieldvalues_alephseq_like(recid, \ COAUTHOR_TAG[:3]) if coauthorfield_content: coauthor_field_lines = coauthorfield_content.split("\n") for line in coauthor_field_lines: for name_item in names_list: breakit = False if line.count(name_item) > 0: #get affilitions .. the correct ones are $$+code code = COAUTHOR_INST_TAG[-1] myparts = line.split("$$") for part in myparts: if part and part[0] == code: myaff = part[1:] affus.append(myaff) breakit = True if breakit: break #if this is empty, add a dummy " " value if (affus == []): affus = [" "] for a in affus: #add in author_aff_pubs if (author_aff_pubs.has_key(a)): tmp = author_aff_pubs[a] tmp.append(recid) author_aff_pubs[a] = tmp else: author_aff_pubs[a] = [recid] return author_aff_pubs index = __call__ class WebInterfaceRecordPages(WebInterfaceDirectory): """ Handling of a /CFG_SITE_RECORD/ URL fragment """ _exports = ['', 'files', 'reviews', 'comments', 'usage', 'references', 'export', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots'] #_exports.extend(output_formats) def __init__(self, recid, tab, format=None): self.recid = recid self.tab = tab self.format = format self.files = WebInterfaceFilesPages(self.recid) self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1) self.comments = WebInterfaceCommentsPages(self.recid) self.usage = self self.references = self self.keywords = self self.holdings = WebInterfaceHoldingsPages(self.recid) self.citations = self self.plots = self self.export = WebInterfaceRecordExport(self.recid, self.format) self.edit = WebInterfaceEditPages(self.recid) self.merge = WebInterfaceMergePages(self.recid) return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid argd['tab'] = self.tab if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /CFG_SITE_RECORD/123 or /CFG_SITE_RECORD/123/ index = __call__ class WebInterfaceRecordRestrictedPages(WebInterfaceDirectory): """ Handling of a /record-restricted/ URL fragment """ _exports = ['', 'files', 'reviews', 'comments', 'usage', 'references', 'export', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots'] #_exports.extend(output_formats) def __init__(self, recid, tab, format=None): self.recid = recid self.tab = tab self.format = format self.files = WebInterfaceFilesPages(self.recid) self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1) self.comments = WebInterfaceCommentsPages(self.recid) self.usage = self self.references = self self.keywords = self self.holdings = WebInterfaceHoldingsPages(self.recid) self.citations = self self.plots = self self.export = WebInterfaceRecordExport(self.recid, self.format) self.edit = WebInterfaceEditPages(self.recid) self.merge = WebInterfaceMergePages(self.recid) return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS record_primary_collection = guess_primary_collection_of_a_record(self.recid) if collection_restricted_p(record_primary_collection): (auth_code, dummy) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection) if auth_code: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') # Keep all the arguments, they might be reused in the # record page itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /CFG_SITE_RECORD/123 or /CFG_SITE_RECORD/123/ index = __call__ class WebInterfaceSearchResultsPages(WebInterfaceDirectory): """ Handling of the /search URL and its sub-pages. """ _exports = ['', 'authenticate', 'cache', 'log'] def __call__(self, req, form): """ Perform a search. """ argd = wash_search_urlargd(form) _ = gettext_set_language(argd['ln']) if req.method == 'POST': raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized(req, "../", text=_("You are not authorized to view this area."), navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass if CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL == 2: ## Let's update the current collections list with all ## the restricted collections the user has rights to view. try: restricted_collections = user_info['precached_permitted_restricted_collections'] argd_collections = set(argd['c']) argd_collections.update(restricted_collections) argd['c'] = list(argd_collections) except KeyError: pass if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS involved_collections = set() involved_collections.update(argd['c']) involved_collections.add(argd['cc']) if argd['id'] > 0: argd['recid'] = argd['id'] if argd['idb'] > 0: argd['recidb'] = argd['idb'] if argd['sysno']: tmp_recid = find_record_from_sysno(argd['sysno']) if tmp_recid: argd['recid'] = tmp_recid if argd['sysnb']: tmp_recid = find_record_from_sysno(argd['sysnb']) if tmp_recid: argd['recidb'] = tmp_recid if argd['recid'] > 0: if argd['recidb'] > argd['recid']: # Hack to check if among the restricted collections # at least a record of the range is there and # then if the user is not authorized for that # collection. recids = intbitset(xrange(argd['recid'], argd['recidb'])) restricted_collection_cache.recreate_cache_if_needed() for collname in restricted_collection_cache.cache: (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collname) if auth_code and user_info['email'] == 'guest': coll_recids = get_collection(collname).reclist if coll_recids & recids: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : collname}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') else: involved_collections.add(guess_primary_collection_of_a_record(argd['recid'])) # If any of the collection requires authentication, redirect # to the authentication form. for coll in involved_collections: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): auth_code, auth_message = acc_authorize_action(req, 'runbibedit') if auth_code != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out def cache(self, req, form): """Search cache page.""" argd = wash_urlargd(form, {'action': (str, 'show')}) return perform_request_cache(req, action=argd['action']) def log(self, req, form): """Search log page.""" argd = wash_urlargd(form, {'date': (str, '')}) return perform_request_log(req, date=argd['date']) def authenticate(self, req, form): """Restricted search results pages.""" argd = wash_search_urlargd(form) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): auth_code, auth_message = acc_authorize_action(req, 'runbibedit') if auth_code != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd uid = getUid(req) if uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out index = __call__ class WebInterfaceLegacySearchPages(WebInterfaceDirectory): """ Handling of the /search.py URL and its sub-pages. """ _exports = ['', ('authenticate', 'index')] def __call__(self, req, form): """ Perform a search. """ argd = wash_search_urlargd(form) # We either jump into the generic search form, or the specific # /CFG_SITE_RECORD/... display if a recid is requested if argd['recid'] != -1: target = '/%s/%d' % (CFG_SITE_RECORD, argd['recid']) del argd['recid'] else: target = '/search' target += make_canonical_urlargd(argd, search_results_default_urlargd) return redirect_to_url(req, target, apache.HTTP_MOVED_PERMANENTLY) index = __call__ # Parameters for the legacy URLs, of the form /?c=ALEPH legacy_collection_default_urlargd = { 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'verbose': (int, 0), 'c': (str, CFG_SITE_NAME)} class WebInterfaceSearchInterfacePages(WebInterfaceDirectory): """ Handling of collection navigation.""" _exports = [('index.py', 'legacy_collection'), ('', 'legacy_collection'), ('search.py', 'legacy_search'), 'search', 'openurl', 'opensearchdescription', 'logout_SSO_hook'] search = WebInterfaceSearchResultsPages() legacy_search = WebInterfaceLegacySearchPages() def logout_SSO_hook(self, req, form): """Script triggered by the display of the centralized SSO logout dialog. It logouts the user from Invenio and stream back the expected picture.""" logoutUser(req) req.content_type = 'image/gif' req.encoding = None req.filename = 'wsignout.gif' req.headers_out["Content-Disposition"] = "inline; filename=wsignout.gif" req.set_content_length(os.path.getsize('%s/img/wsignout.gif' % CFG_WEBDIR)) req.send_http_header() req.sendfile('%s/img/wsignout.gif' % CFG_WEBDIR) def _lookup(self, component, path): """ This handler is invoked for the dynamic URLs (for collections and records)""" if component == 'collection': c = '/'.join(path) def answer(req, form): """Accessing collections cached pages.""" # Accessing collections: this is for accessing the # cached page on top of each collection. argd = wash_urlargd(form, search_interface_default_urlargd) # We simply return the cached page of the collection argd['c'] = c if not argd['c']: # collection argument not present; display # home collection by default argd['c'] = CFG_SITE_NAME # Treat `as' argument specially: if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] return display_collection(req, **argd) return answer, [] elif component == CFG_SITE_RECORD and path and path[0] == 'merge': return WebInterfaceMergePages(), path[1:] elif component == CFG_SITE_RECORD and path and path[0] == 'edit': return WebInterfaceEditPages(), path[1:] elif component == CFG_SITE_RECORD and path and path[0] == 'multiedit': return WebInterfaceMultiEditPages(), path[1:] elif component == CFG_SITE_RECORD or component == 'record-restricted': try: if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: # let us try to recognize // style of URLs: x = get_mysql_recid_from_aleph_sysno(path[0]) if x: recid = x else: recid = int(path[0]) else: recid = int(path[0]) except IndexError: # display record #1 for URL /CFG_SITE_RECORD without a number recid = 1 except ValueError: if path[0] == '': # display record #1 for URL /CFG_SITE_RECORD/ without a number recid = 1 else: # display page not found for URLs like /CFG_SITE_RECORD/foo return None, [] if recid <= 0: # display page not found for URLs like /CFG_SITE_RECORD/-5 or /CFG_SITE_RECORD/0 return None, [] format = None tab = '' try: if path[1] in ['', 'files', 'reviews', 'comments', 'usage', 'references', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots']: tab = path[1] elif path[1] == 'export': tab = '' format = path[2] # format = None # elif path[1] in output_formats: # tab = '' # format = path[1] else: # display page not found for URLs like /CFG_SITE_RECORD/references # for a collection where 'references' tabs is not visible return None, [] except IndexError: # Keep normal url if tabs is not specified pass #if component == 'record-restricted': #return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:] #else: return WebInterfaceRecordPages(recid, tab, format), path[1:] return None, [] def openurl(self, req, form): """ OpenURL Handler.""" argd = wash_urlargd(form, websearch_templates.tmpl_openurl_accepted_args) ret_url = websearch_templates.tmpl_openurl2invenio(argd) if ret_url: return redirect_to_url(req, ret_url) else: return redirect_to_url(req, CFG_SITE_URL) def opensearchdescription(self, req, form): """OpenSearch description file""" req.content_type = "application/opensearchdescription+xml" req.send_http_header() argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0) }) return websearch_templates.tmpl_opensearch_description(ln=argd['ln']) def legacy_collection(self, req, form): """Collection URL backward compatibility handling.""" accepted_args = dict(legacy_collection_default_urlargd) argd = wash_urlargd(form, accepted_args) # Treat `as' argument specially: if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] # If we specify no collection, then we don't need to redirect # the user, so that accessing returns the # default collection. if not form.has_key('c'): return display_collection(req, **argd) # make the collection an element of the path, and keep the # other query elements as is. If the collection is CFG_SITE_NAME, # however, redirect to the main URL. c = argd['c'] del argd['c'] if c == CFG_SITE_NAME: target = '/' else: target = '/collection/' + quote(c) # Treat `as' argument specially: # We are going to redirect, so replace `aas' by `as' visible argument: if argd.has_key('aas'): argd['as'] = argd['aas'] del argd['aas'] target += make_canonical_urlargd(argd, legacy_collection_default_urlargd) return redirect_to_url(req, target) def display_collection(req, c, aas, verbose, ln): """Display search interface page for collection c by looking in the collection cache.""" _ = gettext_set_language(ln) req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln}, search_interface_default_urlargd) # get user ID: try: uid = getUid(req) user_preferences = {} if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this collection", navmenuid='search') elif uid > 0: user_preferences = get_user_preferences(uid) except Error: register_exception(req=req, alert_admin=True) return page(title=_("Internal Error"), body=create_error_box(req, verbose=verbose, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req, navmenuid='search') # start display: req.content_type = "text/html" req.send_http_header() # deduce collection id: colID = get_colID(get_coll_normalised_name(c)) if type(colID) is not int: page_body = '

      ' + (_("Sorry, collection %s does not seem to exist.") % ('' + str(c) + '')) + '

      ' page_body = '

      ' + (_("You may want to start browsing from %s.") % ('' + get_coll_i18nname(CFG_SITE_NAME, ln) + '')) + '

      ' if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND return page(title=_("Collection %s Not Found") % cgi.escape(c), body=page_body, description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))), keywords="%s" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') # wash `aas' argument: if not os.path.exists("%s/collections/%d/body-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln)): # nonexistent `aas' asked for, fall back to Simple Search: aas = 0 # display collection interface page: try: filedesc = open("%s/collections/%d/navtrail-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln), "r") c_navtrail = filedesc.read() filedesc.close() except: c_navtrail = "" try: filedesc = open("%s/collections/%d/body-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln), "r") c_body = filedesc.read() filedesc.close() except: c_body = "" try: filedesc = open("%s/collections/%d/portalbox-tp-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_tp = filedesc.read() filedesc.close() except: c_portalbox_tp = "" try: filedesc = open("%s/collections/%d/portalbox-te-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_te = filedesc.read() filedesc.close() except: c_portalbox_te = "" try: filedesc = open("%s/collections/%d/portalbox-lt-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_lt = filedesc.read() filedesc.close() except: c_portalbox_lt = "" try: # show help boxes (usually located in "tr", "top right") # if users have not banned them in their preferences: c_portalbox_rt = "" if user_preferences.get('websearch_helpbox', 1) > 0: filedesc = open("%s/collections/%d/portalbox-rt-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_rt = filedesc.read() filedesc.close() except: c_portalbox_rt = "" try: filedesc = open("%s/collections/%d/last-updated-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_last_updated = filedesc.read() filedesc.close() except: c_last_updated = "" try: title = get_coll_i18nname(c, ln) except: title = "" show_title_p = True body_css_classes = [] if c == CFG_SITE_NAME: # Do not display title on home collection show_title_p = False body_css_classes.append('home') if len(collection_reclist_cache.cache.keys()) == 1: # if there is only one collection defined, do not print its # title on the page as it would be displayed repetitively. show_title_p = False if aas == -1: show_title_p = False if CFG_INSPIRE_SITE == 1: # INSPIRE should never show title, but instead use css to # style collections show_title_p = False body_css_classes.append(nmtoken_from_string(c)) # RSS: rssurl = CFG_SITE_URL + '/rss' rssurl_params = [] if c != CFG_SITE_NAME: rssurl_params.append('cc=' + quote(c)) if ln != CFG_SITE_LANG and \ c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS: rssurl_params.append('ln=' + ln) if rssurl_params: rssurl += '?' + '&'.join(rssurl_params) if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: metaheaderadd = get_mathjax_header() else: metaheaderadd = '' return page(title=title, body=c_body, navtrail=c_navtrail, description="%s - %s" % (CFG_SITE_NAME, c), keywords="%s, %s" % (CFG_SITE_NAME, c), metaheaderadd=metaheaderadd, uid=uid, language=ln, req=req, cdspageboxlefttopadd=c_portalbox_lt, cdspageboxrighttopadd=c_portalbox_rt, titleprologue=c_portalbox_tp, titleepilogue=c_portalbox_te, lastupdated=c_last_updated, navmenuid='search', rssurl=rssurl, body_css_classes=body_css_classes, show_title_p=show_title_p) class WebInterfaceRSSFeedServicePages(WebInterfaceDirectory): """RSS 2.0 feed service pages.""" def __call__(self, req, form): """RSS 2.0 feed service.""" # Keep only interesting parameters for the search default_params = websearch_templates.rss_default_urlargd # We need to keep 'jrec' and 'rg' here in order to have # 'multi-page' RSS. These parameters are not kept be default # as we don't want to consider them when building RSS links # from search and browse pages. default_params.update({'jrec':(int, 1), 'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)}) argd = wash_urlargd(form, default_params) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # Create a standard filename with these parameters current_url = websearch_templates.build_rss_url(argd) cache_filename = current_url.split('/')[-1] # In the same way as previously, add 'jrec' & 'rg' req.content_type = "application/rss+xml" req.send_http_header() try: # Try to read from cache path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) # Check if cache needs refresh filedesc = open(path, "r") last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime) assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)) c_rss = filedesc.read() filedesc.close() req.write(c_rss) return except Exception, e: # do it live and cache previous_url = None if argd['jrec'] > 1: prev_jrec = argd['jrec'] - argd['rg'] if prev_jrec < 1: prev_jrec = 1 previous_url = websearch_templates.build_rss_url(argd, jrec=prev_jrec) recIDs = perform_request_search(req, of="id", c=argd['c'], cc=argd['cc'], p=argd['p'], f=argd['f'], p1=argd['p1'], f1=argd['f1'], m1=argd['m1'], op1=argd['op1'], p2=argd['p2'], f2=argd['f2'], m2=argd['m2'], op2=argd['op2'], p3=argd['p3'], f3=argd['f3'], m3=argd['m3']) nb_found = len(recIDs) next_url = None if len(recIDs) >= argd['jrec'] + argd['rg']: next_url = websearch_templates.build_rss_url(argd, jrec=(argd['jrec'] + argd['rg'])) first_url = websearch_templates.build_rss_url(argd, jrec=1) last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1) recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1] rss_prologue = '\n' + \ websearch_templates.tmpl_xml_rss_prologue(current_url=current_url, previous_url=previous_url, next_url=next_url, first_url=first_url, last_url=last_url, nb_found=nb_found, jrec=argd['jrec'], rg=argd['rg'], cc=argd['cc']) + '\n' req.write(rss_prologue) rss_body = format_records(recIDs, of='xr', ln=argd['ln'], user_info=user_info, record_separator="\n", req=req, epilogue="\n") rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n' req.write(rss_epilogue) # update cache dirname = "%s/rss" % (CFG_CACHEDIR) mymkdir(dirname) fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) try: # Remove the file just in case it already existed # so that a bit of space is created os.remove(fullfilename) except OSError: pass # Check if there's enough space to cache the request. if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS: try: os.umask(022) f = open(fullfilename, "w") f.write(rss_prologue + rss_body + rss_epilogue) f.close() except IOError, v: if v[0] == 36: # URL was too long. Never mind, don't cache pass else: raise repr(v) index = __call__ class WebInterfaceRecordExport(WebInterfaceDirectory): """ Handling of a ///export/ URL fragment """ _exports = output_formats def __init__(self, recid, format=None): self.recid = recid self.format = format for output_format in output_formats: self.__dict__[output_format] = self return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass # Check if the record belongs to a restricted primary # collection. If yes, redirect to the authenticated URL. user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /CFG_SITE_RECORD/123/export/xm or /CFG_SITE_RECORD/123/export/xm/ index = __call__ diff --git a/modules/webstat/lib/webstat_engine.py b/modules/webstat/lib/webstat_engine.py index bbfbb8e1b..aac94d4c6 100644 --- a/modules/webstat/lib/webstat_engine.py +++ b/modules/webstat/lib/webstat_engine.py @@ -1,2421 +1,2421 @@ ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" __lastupdated__ = "$Date$" import calendar, commands, datetime, time, os, cPickle, random try: import xlwt xlwt_imported = True except ImportError: xlwt_imported = False from invenio.config import CFG_TMPDIR, CFG_SITE_URL, CFG_SITE_NAME, CFG_BINDIR from invenio.urlutils import redirect_to_url from invenio.search_engine import perform_request_search, \ get_collection_reclist, \ - get_fieldvalues, \ get_most_popular_field_values +from invenio.search_engine_utils import get_fieldvalues from invenio.dbquery import run_sql, \ wash_table_column_name from invenio.websubmitadmin_dblayer import get_docid_docname_alldoctypes from invenio.bibcirculation_utils import book_title_from_MARC, \ book_information_from_MARC from invenio.bibcirculation_dblayer import get_id_bibrec, \ get_borrower_data WEBSTAT_SESSION_LENGTH = 48 * 60 * 60 # seconds WEBSTAT_GRAPH_TOKENS = '-=#+@$%&XOSKEHBC' # KEY EVENT TREND SECTION def get_keyevent_trend_collection_population(args): """ Returns the quantity of documents in Invenio for the given timestamp range. @param args['collection']: A collection name @type args['collection']: str @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ # collect action dates lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() if args.get('collection','All') == 'All': sql_query_g = ("SELECT creation_date FROM bibrec WHERE " + \ "creation_date > '%s' AND creation_date < '%s' " + \ "ORDER BY creation_date DESC") % \ (lower, upper) sql_query_i = "SELECT COUNT(id) FROM bibrec " + \ "WHERE creation_date < '%s'" % (lower) else: ids = perform_request_search(cc=args['collection']) if len(ids) == 0: return [] ids_str = str(ids).replace('[', '(').replace(']', ')') sql_query_g = ("SELECT creation_date FROM bibrec WHERE id IN %s AND " + \ "creation_date > '%s' AND creation_date < '%s' " + \ "ORDER BY creation_date DESC") % \ (ids_str, lower, upper) sql_query_i = "SELECT COUNT(id) FROM bibrec " + \ "WHERE id IN %s AND creation_date < '%s'" % (ids_str, lower) action_dates = [x[0] for x in run_sql(sql_query_g)] initial_quantity = run_sql(sql_query_i)[0][0] return _get_trend_from_actions(action_dates, initial_quantity, args['t_start'], args['t_end'], args['granularity'], args['t_format']) def get_keyevent_trend_search_frequency(args): """ Returns the number of searches (of any kind) carried out during the given timestamp range. @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ # collect action dates lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql = "SELECT date FROM query INNER JOIN user_query ON id=id_query " + \ "WHERE date > '%s' AND date < '%s' ORDER BY date DESC" % \ (lower, upper) action_dates = [x[0] for x in run_sql(sql)] return _get_trend_from_actions(action_dates, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) def get_keyevent_trend_comments_frequency(args): """ Returns the number of comments (of any kind) carried out during the given timestamp range. @param args['collection']: A collection name @type args['collection']: str @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ # collect action dates lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() if args.get('collection','All') == 'All': sql = "SELECT date_creation FROM cmtRECORDCOMMENT " + \ "WHERE date_creation > '%s' AND date_creation < '%s'" \ % (lower, upper) + " ORDER BY date_creation DESC" else: ids = get_collection_reclist(args['collection']).tolist() if len(ids) == 0: return [] ids_str = str(ids).replace('[', '(').replace(']', ')') sql = "SELECT date_creation FROM cmtRECORDCOMMENT \ WHERE date_creation > '%s' AND date_creation < '%s' \ AND id_bibrec IN %s ORDER BY date_creation DESC" \ % (lower, upper, ids_str) action_dates = [x[0] for x in run_sql(sql)] return _get_trend_from_actions(action_dates, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) def get_keyevent_trend_search_type_distribution(args): """ Returns the number of searches carried out during the given timestamp range, but also partion them by type Simple and Advanced. @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() # SQL to determine all simple searches: sql = "SELECT date FROM query INNER JOIN user_query ON id=id_query " + \ "WHERE urlargs LIKE '%p=%' " + \ "AND date > '%s' AND date < '%s' ORDER BY date DESC" % (lower, upper) simple = [x[0] for x in run_sql(sql)] # SQL to determine all advanced searches: sql = "SELECT date FROM query INNER JOIN user_query ON id=id_query " + \ "WHERE urlargs LIKE '%as=1%' " + \ "AND date > '%s' AND date < '%s' ORDER BY date DESC" % (lower, upper) advanced = [x[0] for x in run_sql(sql)] # Compute the trend for both types s_trend = _get_trend_from_actions(simple, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) a_trend = _get_trend_from_actions(advanced, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) # Assemble, according to return type return [(s_trend[i][0], (s_trend[i][1], a_trend[i][1])) for i in range(len(s_trend))] def get_keyevent_trend_download_frequency(args): """ Returns the number of full text downloads carried out during the given timestamp range. @param args['collection']: A collection name @type args['collection']: str @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() # Collect list of timestamps of insertion in the specific collection if args.get('collection','All') == 'All': sql = "SELECT download_time FROM rnkDOWNLOADS WHERE download_time > '%s' \ AND download_time < '%s' ORDER BY download_time DESC" % (lower, upper) else: ids = get_collection_reclist(args['collection']).tolist() if len(ids) == 0: return [] ids_str = str(ids).replace('[', '(').replace(']', ')') sql = "SELECT download_time FROM rnkDOWNLOADS WHERE download_time > '%s' \ AND download_time < '%s' AND id_bibrec IN %s \ ORDER BY download_time DESC" % (lower, upper, ids_str) actions = [x[0] for x in run_sql(sql)] return _get_trend_from_actions(actions, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) def get_keyevent_trend_number_of_loans(args): """ Returns the number of loans carried out during the given timestamp range. @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ # collect action dates lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql = "SELECT loaned_on FROM crcLOAN " + \ "WHERE loaned_on > '%s' AND loaned_on < '%s' ORDER BY loaned_on DESC"\ % (lower, upper) action_dates = [x[0] for x in run_sql(sql)] return _get_trend_from_actions(action_dates, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) def get_keyevent_trend_web_submissions(args): """ Returns the quantity of websubmissions in Invenio for the given timestamp range. @param args['doctype']: A doctype name @type args['doctype']: str @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ # collect action dates lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() if args['doctype'] == 'all': sql_query = "SELECT cd FROM sbmSUBMISSIONS " + \ "WHERE action='SBI' AND cd > '%s' AND cd < '%s'" % (lower, upper) + \ " AND status='finished' ORDER BY cd DESC" else: sql_query = "SELECT cd FROM sbmSUBMISSIONS " + \ "WHERE doctype='%s' AND action='SBI' " % args['doctype'] + \ "AND cd > '%s' AND cd < '%s' " % (lower, upper) + \ "AND status='finished' ORDER BY cd DESC" action_dates = [x[0] for x in run_sql(sql_query)] return _get_trend_from_actions(action_dates, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) def get_keyevent_loan_statistics(args): """ Data: - Number of documents (=records) loaned - Number of items loaned on the total number of items - Number of items never loaned on the total number of items - Average time between the date of the record creation and the date of the first loan Filter by - in a specified time span - by user address (=Department) - by UDC (see MARC field 080__a - list to be submitted) - by item status (available, missing) - by date of publication (MARC field 260__c) - by date of the record creation in the database @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['user_address']: borrower address @type args['user_address']: str @param args['udc']: MARC field 080__a @type args['udc']: str @param args['item_status']: available, missing... @type args['item_status']: str @param args['publication_date']: MARC field 260__c @type args['publication_date']: str @param args['creation_date']: date of the record creation in the database @type args['creation_date']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from = "FROM crcLOAN l " sql_where = "WHERE loaned_on > '%s' AND loaned_on < '%s' " % (lower, upper) if 'user_address' in args and args['user_address'] != '': sql_from += ", crcBORROWER bor " sql_where += """AND l.id_crcBORROWER = bor.id AND bor.address LIKE '%%%s%%' """ % args['user_address'] if 'udc' in args and args['udc'] != '': sql_where += "AND l.id_bibrec IN ( SELECT brb.id_bibrec \ FROM bibrec_bib08x brb, bib08x b \ WHERE brb.id_bibxxx = b.id AND tag='080__a' \ AND value LIKE '%%%s%%')" % args['udc'] if 'item_status' in args and args['item_status'] != '': sql_from += ", crcITEM i " sql_where += "AND l.barcode = i.barcode AND i.status = '%s' " % args['item_status'] if 'publication_date' in args and args['publication_date'] != '': sql_where += "AND l.id_bibrec IN ( SELECT brb.id_bibrec \ FROM bibrec_bib26x brb, bib26x b \ WHERE brb.id_bibxxx = b.id AND tag='260__c' \ AND value LIKE '%%%s%%') " % args['publication_date'] if 'creation_date' in args and args['creation_date'] != '': sql_from += ", bibrec br " sql_where += """AND br.id=l.id_bibrec AND br.creation_date LIKE '%%%s%%' """ % args['creation_date'] # Number of loans: loans = run_sql("SELECT COUNT(DISTINCT l.id_bibrec) " + sql_from + sql_where)[0][0] # Number of items loaned on the total number of items: items_loaned = run_sql("SELECT COUNT(DISTINCT l.barcode) " + sql_from + sql_where)[0][0] total_items = run_sql("SELECT COUNT(*) FROM crcITEM")[0][0] loaned_on_total = float(items_loaned) / float(total_items) # Number of items never loaned on the total number of items never_loaned_on_total = float(total_items - items_loaned) / float(total_items) # Average time between the date of the record creation and the date of the first loan avg_sql = "SELECT DATEDIFF(MIN(loaned_on), MIN(br.creation_date)) " + sql_from if not ('creation_date' in args and args['creation_date'] != ''): avg_sql += ", bibrec br " avg_sql += sql_where if not ('creation_date' in args and args['creation_date'] != ''): avg_sql += "AND br.id=l.id_bibrec " avg_sql += "GROUP BY l.id_bibrec, br.id" res_avg = run_sql(avg_sql) if len(res_avg) > 0: avg = res_avg[0][0] else: avg = 0 return ((loans, ), (loaned_on_total, ), (never_loaned_on_total, ), (avg, )) def get_keyevent_loan_lists(args): """ Lists: - List of documents (= records) never loaned - List of most loaned documents (columns: number of loans, number of copies and the creation date of the record, in order to calculate the number of loans by copy), sorted by decreasing order (50 items) Filter by - in a specified time span - by UDC (see MARC field 080__a - list to be submitted) - by loan period (4 week loan, one week loan...) - by a certain number of loans - by date of publication (MARC field 260__c) - by date of the record creation in the database - by user address (=Department) @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['udc']: MARC field 080__a @type args['udc']: str @param args['loan_period']: 4 week loan, one week loan... @type args['loan_period']: str @param args['min_loan']: minimum number of loans @type args['min_loan']: int @param args['max_loan']: maximum number of loans @type args['max_loan']: int @param args['publication_date']: MARC field 260__c @type args['publication_date']: str @param args['creation_date']: date of the record creation in the database @type args['creation_date']: str @param args['user_address']: borrower address @type args['user_address']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from = "FROM crcLOAN l " sql_where = "WHERE type = 'normal' AND loaned_on > %s AND loaned_on < %s " param = [lower, upper] if 'user_address' in args and args['user_address'] != '': sql_from += ", crcBORROWER bor " sql_where += "AND l.id_crcBORROWER = bor.id AND bor.address LIKE %s " param.append('%%%s%%' % args['user_address']) if 'udc' in args and args['udc'] != '': sql_where += "AND l.id_bibrec IN ( SELECT brb.id_bibrec \ FROM bibrec_bib08x brb, bib08x b \ WHERE brb.id_bibxxx = b.id AND tag='080__a' \ AND value LIKE %s)" param.append('%%%s%%' % args['udc']) if 'loan_period' in args and args['loan_period'] != '': sql_from += ", crcITEM i " sql_where += "AND l.barcode = i.barcode AND i.loan_period = %s " param.append(args['loan_period']) if 'publication_date' in args and args['publication_date'] != '': sql_where += "AND l.id_bibrec IN ( SELECT brb.id_bibrec \ FROM bibrec_bib26x brb, bib26x b \ WHERE brb.id_bibxxx = b.id AND tag='260__c' \ AND value LIKE %s) " param.append('%%%s%%' % args['publication_date']) if 'creation_date' in args and args['creation_date'] != '': sql_from += ", bibrec br " sql_where += "AND br.id=l.id_bibrec AND br.creation_date LIKE %s " param.append('%%%s%%' % args['creation_date']) param = tuple(param) res = [("", "Title", "Author", "Edition", "Number of loans", "Number of copies", "Date of creation of the record")] # Documents (= records) never loaned: for rec, copies in run_sql("""SELECT id_bibrec, COUNT(*) FROM crcITEM WHERE id_bibrec NOT IN (SELECT l.id_bibrec """ + sql_from + sql_where + ") GROUP BY id_bibrec", param): loans = run_sql("SELECT COUNT(*) %s %s AND l.id_bibrec=%s" % (sql_from, sql_where, rec), param)[0][0] try: creation = run_sql("SELECT creation_date FROM bibrec WHERE id=%s", (rec, ))[0][0] except: creation = datetime.datetime(1970, 01, 01) author = get_fieldvalues(rec, "100__a") if len(author) > 0: author = author[0] else: author = "" edition = get_fieldvalues(rec, "250__a") if len(edition) > 0: edition = edition[0] else: edition = "" res.append(('Documents never loaned', book_title_from_MARC(rec), author, edition, loans, copies, creation)) # Most loaned documents most_loaned = [] check_num_loans = "" if 'min_loans' in args and args['min_loans'] != '': check_num_loans += "COUNT(*) >= %s" % args['min_loans'] if 'max_loans' in args and args['max_loans'] != '' and args['max_loans'] != 0: if check_num_loans != "": check_num_loans += " AND " check_num_loans += "COUNT(*) <= %s" % args['max_loans'] if check_num_loans != "": check_num_loans = " HAVING " + check_num_loans mldocs = run_sql("SELECT l.id_bibrec, COUNT(*) " + sql_from + sql_where + " GROUP BY l.id_bibrec " + check_num_loans, param) for rec, loans in mldocs: copies = run_sql("SELECT COUNT(*) FROM crcITEM WHERE id_bibrec=%s", (rec, ))[0][0] most_loaned.append((rec, loans, copies, loans / copies)) if most_loaned == []: return (res) most_loaned.sort(cmp=lambda x, y: cmp(x[3], y[3])) if len(most_loaned) > 50: most_loaned = most_loaned[:49] most_loaned.reverse() for rec, loans, copies, _ in most_loaned: author = get_fieldvalues(rec, "100__a") if len(author) > 0: author = author[0] else: author = "" edition = get_fieldvalues(rec, "250__a") if len(edition) > 0: edition = edition[0] else: edition = "" try: creation = run_sql("SELECT creation_date FROM bibrec WHERE id=%s", (rec, ))[0][0] except: creation = datetime.datetime(1970, 01, 01) res.append(('Most loaned documents', book_title_from_MARC(rec), author, edition, loans, copies, creation)) return (res) def get_keyevent_renewals_lists(args): """ Lists: - List of most renewed items stored by decreasing order (50 items) Filter by - in a specified time span - by UDC (see MARC field 080__a - list to be submitted) - by collection - by user address (=Department) @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['udc']: MARC field 080__a @type args['udc']: str @param args['collection']: collection of the record @type args['collection']: str @param args['user_address']: borrower address @type args['user_address']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from = "FROM crcLOAN l, crcITEM i " sql_where = "WHERE loaned_on > %s AND loaned_on < %s AND i.barcode = l.barcode " param = [lower, upper] if 'user_address' in args and args['user_address'] != '': sql_from += ", crcBORROWER bor " sql_where += "AND l.id_crcBORROWER = bor.id AND bor.address LIKE %s " param.append('%%%s%%' % args['user_address']) if 'udc' in args and args['udc'] != '': sql_where += "AND l.id_bibrec IN ( SELECT brb.id_bibrec \ FROM bibrec_bib08x brb, bib08x b \ WHERE brb.id_bibxxx = b.id AND tag='080__a' \ AND value LIKE %s)" param.append('%%%s%%' % args['udc']) filter_coll = False if 'collection' in args and args['collection'] != '': filter_coll = True recid_list = get_collection_reclist(args['collection']) param = tuple(param) # Results: res = [("Title", "Author", "Edition", "Number of renewals")] for rec, renewals in run_sql("SELECT i.id_bibrec, SUM(number_of_renewals) " + sql_from + sql_where + " GROUP BY i.id_bibrec ORDER BY SUM(number_of_renewals) DESC LIMIT 50", param): if filter_coll and rec not in recid_list: continue author = get_fieldvalues(rec, "100__a") if len(author) > 0: author = author[0] else: author = "" edition = get_fieldvalues(rec, "250__a") if len(edition) > 0: edition = edition[0] else: edition = "" res.append((book_title_from_MARC(rec), author, edition, int(renewals))) return (res) def get_keyevent_returns_table(args): """ Data: - Number of overdue returns in a year @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() # Overdue returns: returns = run_sql("SELECT COUNT(*) FROM crcLOAN l \ WHERE loaned_on > %s AND loaned_on < %s AND \ due_date < NOW() AND (returned_on = '0000-00-00 00:00:00' \ OR returned_on > due_date)", (lower, upper))[0][0] return ((returns, ), ) def get_keyevent_trend_returns_percentage(args): """ Returns the number of overdue returns and the total number of returns @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() # SQL to determine overdue returns: sql = "SELECT due_date FROM crcLOAN " + \ "WHERE loaned_on > %s AND loaned_on < %s AND " + \ "due_date < NOW() AND (returned_on = '0000-00-00 00:00:00' " + \ "OR returned_on > due_date) ORDER BY due_date DESC" overdue = [x[0] for x in run_sql(sql, (lower, upper))] # SQL to determine all returns: sql = "SELECT due_date FROM crcLOAN " + \ "WHERE loaned_on > %s AND loaned_on < %s AND " + \ "due_date < NOW() ORDER BY due_date DESC" total = [x[0] for x in run_sql(sql, (lower, upper))] # Compute the trend for both types s_trend = _get_trend_from_actions(overdue, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) a_trend = _get_trend_from_actions(total, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) # Assemble, according to return type return [(s_trend[i][0], (s_trend[i][1], a_trend[i][1])) for i in range(len(s_trend))] def get_keyevent_ill_requests_statistics(args): """ Data: - Number of ILL requests - Number of satisfied ILL requests 3 months after the date of request creation on a period of one year - Percentage of satisfied ILL requests 3 months after the date of request creation on a period of one year - Average time between the date and the hour of the ill request date and the date and the hour of the delivery item to the user on a period of one year (with flexibility in the choice of the dates) - Average time between the date and the hour the ILL request was sent to the supplier and the date and hour of the delivery item on a period of one year (with flexibility in the choice of the dates) Filter by - in a specified time span - by type of document (book or article) - by user address - by status of the request (= new, sent, etc.) - by supplier @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['doctype']: type of document (book or article) @type args['doctype']: str @param args['status']: status of the request (= new, sent, etc.) @type args['status']: str @param args['supplier']: supplier @type args['supplier']: str @param args['user_address']: borrower address @type args['user_address']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from = "FROM crcILLREQUEST ill " sql_where = "WHERE period_of_interest_from > %s AND period_of_interest_from < %s " param = [lower, upper] if 'user_address' in args and args['user_address'] != '': sql_from += ", crcBORROWER bor " sql_where += "AND ill.id_crcBORROWER = bor.id AND bor.address LIKE %s " param.append('%%%s%%' % args['user_address']) if 'doctype' in args and args['doctype'] != '': sql_where += "AND ill.request_type=%s" param.append(args['doctype']) if 'status' in args and args['status'] != '': sql_where += "AND ill.status = %s " param.append(args['status']) if 'supplier' in args and args['supplier'] != '': sql_from += ", crcLIBRARY lib " sql_where += "AND lib.id=ill.id_crcLIBRARY AND lib.name=%s " param.append(args['supplier']) param = tuple(param) # Number of requests: requests = run_sql("SELECT COUNT(*) " + sql_from + sql_where, param)[0][0] # Number of satisfied ILL requests 3 months after the date of request creation: satrequests = run_sql("SELECT COUNT(*) " + sql_from + sql_where + "AND arrival_date != '0000-00-00 00:00:00' AND \ DATEDIFF(arrival_date, period_of_interest_from) < 90 ", param)[0][0] # Average time between the date and the hour of the ill request date and # the date and the hour of the delivery item to the user avgdel = run_sql("SELECT AVG(TIMESTAMPDIFF(HOUR, period_of_interest_from, request_date)) " + sql_from + sql_where, param)[0][0] if avgdel is int: avgdel = int(avgdel) else: avgdel = 0 # Average time between the date and the hour the ILL request was sent to # the supplier and the date and hour of the delivery item avgsup = run_sql("SELECT AVG(TIMESTAMPDIFF(HOUR, arrival_date, request_date)) " + sql_from + sql_where, param)[0][0] if avgsup is int: avgsup = int(avgsup) else: avgsup = 0 return ((requests, ), (satrequests, ), (avgdel, ), (avgsup, )) def get_keyevent_ill_requests_lists(args): """ Lists: - List of ILL requests Filter by - in a specified time span - by type of request (article or book) - by supplier @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['doctype']: type of request (article or book) @type args['doctype']: str @param args['supplier']: supplier @type args['supplier']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from = "FROM crcILLREQUEST ill " sql_where = "WHERE request_date > %s AND request_date < %s " param = [lower, upper] if 'doctype' in args and args['doctype'] != '': sql_where += "AND ill.request_type=%s" param.append(args['doctype']) if 'supplier' in args and args['supplier'] != '': sql_from += ", crcLIBRARY lib " sql_where += "AND lib.id=ill.id_crcLIBRARY AND lib.name=%s " param.append(args['supplier']) # Results: res = [("Title", "Author", "Edition")] for item_info in run_sql("SELECT item_info " + sql_from + sql_where + " LIMIT 100", param): item_info = eval(item_info[0]) try: res.append((item_info['title'], item_info['authors'], item_info['edition'])) except KeyError: None return (res) def get_keyevent_trend_satisfied_ill_requests_percentage(args): """ Returns the number of satisfied ILL requests 3 months after the date of request creation and the total number of ILL requests @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['doctype']: type of document (book or article) @type args['doctype']: str @param args['status']: status of the request (= new, sent, etc.) @type args['status']: str @param args['supplier']: supplier @type args['supplier']: str @param args['user_address']: borrower address @type args['user_address']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from = "FROM crcILLREQUEST ill " sql_where = "WHERE request_date > %s AND request_date < %s " param = [lower, upper] if 'user_address' in args and args['user_address'] != '': sql_from += ", crcBORROWER bor " sql_where += "AND ill.id_crcBORROWER = bor.id AND bor.address LIKE %s " param.append('%%%s%%' % args['user_address']) if 'doctype' in args and args['doctype'] != '': sql_where += "AND ill.request_type=%s" param.append(args['doctype']) if 'status' in args and args['status'] != '': sql_where += "AND ill.status = %s " param.append(args['status']) if 'supplier' in args and args['supplier'] != '': sql_from += ", crcLIBRARY lib " sql_where += "AND lib.id=ill.id_crcLIBRARY AND lib.name=%s " param.append(args['supplier']) # SQL to determine satisfied ILL requests: sql = "SELECT request_date " + sql_from + sql_where + \ "AND ADDDATE(request_date, 90) < NOW() AND (arrival_date != '0000-00-00 00:00:00' " + \ "OR arrival_date < ADDDATE(request_date, 90)) ORDER BY request_date DESC" satisfied = [x[0] for x in run_sql(sql, param)] # SQL to determine all ILL requests: sql = "SELECT request_date " + sql_from + sql_where + \ " AND ADDDATE(request_date, 90) < NOW() ORDER BY request_date DESC" total = [x[0] for x in run_sql(sql, param)] # Compute the trend for both types s_trend = _get_trend_from_actions(satisfied, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) a_trend = _get_trend_from_actions(total, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) # Assemble, according to return type return [(s_trend[i][0], (s_trend[i][1], a_trend[i][1])) for i in range(len(s_trend))] def get_keyevent_items_statistics(args): """ Data: - The total number of items - Total number of new items added in last year Filter by - in a specified time span - by collection - by UDC (see MARC field 080__a - list to be submitted) @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['udc']: MARC field 080__a @type args['udc']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from = "FROM crcITEM i " sql_where = "WHERE " param = [] if 'udc' in args and args['udc'] != '': sql_where += "i.id_bibrec IN ( SELECT brb.id_bibrec \ FROM bibrec_bib08x brb, bib08x b \ WHERE brb.id_bibxxx = b.id AND tag='080__a' \ AND value LIKE %s)" param.append('%%%s%%' % args['udc']) # Number of items: if sql_where == "WHERE ": sql_where = "" items = run_sql("SELECT COUNT(i.id_bibrec) " + sql_from + sql_where, param)[0][0] # Number of new items: param += [lower, upper] if sql_where == "": sql_where = "WHERE creation_date > %s AND creation_date < %s " else: sql_where += " AND creation_date > %s AND creation_date < %s " new_items = run_sql("SELECT COUNT(i.id_bibrec) " + sql_from + sql_where, param)[0][0] return ((items, ), (new_items, )) def get_keyevent_items_lists(args): """ Lists: - The list of items Filter by - by library (=physical location of the item) - by status (=on loan, available, requested, missing...) @param args['library']: physical location of the item @type args[library'']: str @param args['status']: on loan, available, requested, missing... @type args['status']: str """ sql_from = "FROM crcITEM i " sql_where = "WHERE " param = [] if 'library' in args and args['library'] != '': sql_from += ", crcLIBRARY li " sql_where += "li.id=i.id_crcLIBRARY AND li.name=%s " param.append(args['library']) if 'status' in args and args['status'] != '': if sql_where != "WHERE ": sql_where += "AND " sql_where += "i.status = %s " param.append(args['status']) # Results: res = [("Title", "Author", "Edition", "Barcode", "Publication date")] if sql_where == "WHERE ": sql_where = "" if len(param) == 0: sqlres = run_sql("SELECT i.barcode, i.id_bibrec " + sql_from + sql_where + " LIMIT 100") else: sqlres = run_sql("SELECT i.barcode, i.id_bibrec " + sql_from + sql_where + " LIMIT 100", tuple(param)) for barcode, rec in sqlres: author = get_fieldvalues(rec, "100__a") if len(author) > 0: author = author[0] else: author = "" edition = get_fieldvalues(rec, "250__a") if len(edition) > 0: edition = edition[0] else: edition = "" res.append((book_title_from_MARC(rec), author, edition, barcode, book_information_from_MARC(int(rec))[1])) return (res) def get_keyevent_loan_request_statistics(args): """ Data: - Number of hold requests, one week after the date of request creation - Number of successful hold requests transactions - Average time between the hold request date and the date of delivery document in a year Filter by - in a specified time span - by item status (available, missing) @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['item_status']: available, missing... @type args['item_status']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from = "FROM crcLOANREQUEST lr " sql_where = "WHERE request_date > %s AND request_date < %s " param = [lower, upper] if 'item_status' in args and args['item_status'] != '': sql_from += ", crcITEM i " sql_where += "AND lr.barcode = i.barcode AND i.status = %s " param.append(args['item_status']) custom_table = get_customevent_table("loanrequest") # Number of hold requests, one week after the date of request creation: holds = run_sql("""SELECT COUNT(*) %s, %s ws %s AND ws.request_id=lr.id AND DATEDIFF(ws.creation_time, lr.request_date) >= 7""" % (sql_from, custom_table, sql_where), param)[0][0] # Number of successful hold requests transactions succesful_holds = run_sql("SELECT COUNT(*) %s %s AND lr.status='done'" % (sql_from, sql_where), param)[0][0] # Average time between the hold request date and the date of delivery document in a year avg = run_sql("""SELECT AVG(DATEDIFF(ws.creation_time, lr.request_date)) %s, %s ws %s AND ws.request_id=lr.id""" % (sql_from, custom_table, sql_where), param)[0][0] if avg is int: avg = int(avg) else: avg = 0 return ((holds, ), (succesful_holds, ), (avg, )) def get_keyevent_loan_request_lists(args): """ Lists: - List of the most requested items Filter by - in a specified time span - by UDC (see MARC field 080__a - list to be submitted) - by user address (=Department) @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['udc']: MARC field 080__a @type args['udc']: str @param args['user_address']: borrower address @type args['user_address']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from = "FROM crcLOANREQUEST lr " sql_where = "WHERE request_date > %s AND request_date < %s " param = [lower, upper] if 'user_address' in args and args['user_address'] != '': sql_from += ", crcBORROWER bor " sql_where += "AND lr.id_crcBORROWER = bor.id AND bor.address LIKE %s " param.append('%%%s%%' % args['user_address']) if 'udc' in args and args['udc'] != '': sql_where += "AND lr.id_bibrec IN ( SELECT brb.id_bibrec \ FROM bibrec_bib08x brb, bib08x b \ WHERE brb.id_bibxxx = b.id AND tag='080__a' \ AND value LIKE %s)" param.append('%%%s%%' % args['udc']) res = [("Title", "Author", "Edition", "Barcode")] # Most requested items: for barcode in run_sql("SELECT lr.barcode " + sql_from + sql_where + " GROUP BY barcode ORDER BY COUNT(*) DESC", param): rec = get_id_bibrec(barcode[0]) author = get_fieldvalues(rec, "100__a") if len(author) > 0: author = author[0] else: author = "" edition = get_fieldvalues(rec, "250__a") if len(edition) > 0: edition = edition[0] else: edition = "" res.append((book_title_from_MARC(rec), author, edition, barcode[0])) return (res) def get_keyevent_user_statistics(args): """ Data: - Total number of active users (to be defined = at least one transaction in the past year) Filter by - in a specified time span - by user address - by registration date @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['user_address']: borrower address @type args['user_address']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from_ill = "FROM crcILLREQUEST ill " sql_from_loan = "FROM crcLOAN l " sql_where_ill = "WHERE request_date > %s AND request_date < %s " sql_where_loan = "WHERE loaned_on > %s AND loaned_on < %s " sql_address = "" param = [lower, upper, lower, upper] if 'user_address' in args and args['user_address'] != '': sql_address += ", crcBORROWER bor WHERE id = user AND \ address LIKE %s " param.append('%%%s%%' % args['user_address']) # Total number of active users: users = run_sql("""SELECT COUNT(DISTINCT user) FROM ((SELECT id_crcBORROWER user %s %s) UNION (SELECT id_crcBORROWER user %s %s)) res %s""" % (sql_from_ill, sql_where_ill, sql_from_loan, sql_where_loan, sql_address), param)[0][0] return ((users, ), ) def get_keyevent_user_lists(args): """ Lists: - List of most intensive users (ILL requests + Loan) Filter by - in a specified time span - by user address - by registration date @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['user_address']: borrower address @type args['user_address']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str """ lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() sql_from_ill = "FROM crcILLREQUEST ill " sql_from_loan = "FROM crcLOAN l " sql_where_ill = "WHERE request_date > %s AND request_date < %s " sql_where_loan = "WHERE loaned_on > %s AND loaned_on < %s " sql_address = "" param = [lower, upper, lower, upper] if 'user_address' in args and args['user_address'] != '': sql_address += ", crcBORROWER bor WHERE id = user AND \ address LIKE %s " param.append('%%%s%%' % args['user_address']) res = [("Name", "Address", "Mailbox", "E-mail", "Number of transactions")] # List of most intensive users (ILL requests + Loan): for borrower_id, trans in run_sql("SELECT user, SUM(trans) FROM \ ((SELECT id_crcBORROWER user, COUNT(*) trans %s %s GROUP BY id_crcBORROWER) UNION \ (SELECT id_crcBORROWER user, COUNT(*) trans %s %s GROUP BY id_crcBORROWER)) res %s \ GROUP BY user ORDER BY SUM(trans) DESC" % (sql_from_ill, sql_where_ill, sql_from_loan, sql_where_loan, sql_address), param): name, address, mailbox, email = get_borrower_data(borrower_id) res.append((name, address, mailbox, email, int(trans))) return (res) # KEY EVENT SNAPSHOT SECTION def get_keyevent_snapshot_uptime_cmd(): """ A specific implementation of get_current_event(). @return: The std-out from the UNIX command 'uptime'. @type: str """ return _run_cmd('uptime').strip().replace(' ', ' ') def get_keyevent_snapshot_apache_processes(): """ A specific implementation of get_current_event(). @return: The std-out from the UNIX command 'uptime'. @type: str """ # The number of Apache processes (root+children) return _run_cmd('ps -e | grep apache2 | grep -v grep | wc -l') def get_keyevent_snapshot_bibsched_status(): """ A specific implementation of get_current_event(). @return: Information about the number of tasks in the different status modes. @type: [(str, int)] """ sql = "SELECT status, COUNT(status) FROM schTASK GROUP BY status" return [(x[0], int(x[1])) for x in run_sql(sql)] def get_keyevent_snapshot_sessions(): """ A specific implementation of get_current_event(). @return: The current number of website visitors (guests, logged in) @type: (int, int) """ # SQL to retrieve sessions in the Guests sql = "SELECT COUNT(session_expiry) " + \ "FROM session INNER JOIN user ON uid=id " + \ "WHERE email = '' AND " + \ "session_expiry-%d < unix_timestamp() AND " \ % WEBSTAT_SESSION_LENGTH + \ "unix_timestamp() < session_expiry" guests = run_sql(sql)[0][0] # SQL to retrieve sessions in the Logged in users sql = "SELECT COUNT(session_expiry) " + \ "FROM session INNER JOIN user ON uid=id " + \ "WHERE email <> '' AND " + \ "session_expiry-%d < unix_timestamp() AND " \ % WEBSTAT_SESSION_LENGTH + \ "unix_timestamp() < session_expiry" logged_ins = run_sql(sql)[0][0] # Assemble, according to return type return (guests, logged_ins) def get_keyevent_bibcirculation_report(freq='yearly'): """ Monthly and yearly report with the total number of circulation transactions (loans, renewals, returns, ILL requests, hold request). @param freq: yearly or monthly @type freq: str @return: loans, renewals, returns, ILL requests, hold request @type: (int, int, int, int, int) """ if freq == 'monthly': datefrom = datetime.date.today().strftime("%Y-%m-01 00:00:00") else: #yearly datefrom = datetime.date.today().strftime("%Y-01-01 00:00:00") loans, renewals, returns = run_sql("""SELECT COUNT(*), SUM(number_of_renewals), COUNT(returned_on<>'0000-00-00') FROM crcLOAN WHERE loaned_on > %s""", (datefrom, ))[0] illrequests = run_sql("SELECT COUNT(*) FROM crcILLREQUEST WHERE request_date > %s", (datefrom, ))[0][0] holdrequest = run_sql("SELECT COUNT(*) FROM crcLOANREQUEST WHERE request_date > %s", (datefrom, ))[0][0] return (loans, renewals, returns, illrequests, holdrequest) # ERROR LOG STATS def update_error_log_analyzer(): """Creates splitted files for today's errors""" _run_cmd('bash %s/webstat -e -is' % CFG_BINDIR) def get_invenio_error_log_ranking(): """ Returns the ranking of the errors in the invenio log""" return _run_cmd('bash %s/webstat -e -ir' % CFG_BINDIR) def get_invenio_last_n_errors(nerr): """Returns the last nerr errors in the invenio log (without details)""" return _run_cmd('bash %s/webstat -e -il %d' % (CFG_BINDIR, nerr)) def get_invenio_error_details(error): """Returns the complete text of the invenio error.""" out = _run_cmd('bash %s/webstat -e -id %s' % (CFG_BINDIR, error)) return out def get_apache_error_log_ranking(): """ Returns the ranking of the errors in the apache log""" return _run_cmd('bash %s/webstat -e -ar' % CFG_BINDIR) # CUSTOM EVENT SECTION def get_customevent_trend(args): """ Returns trend data for a custom event over a given timestamp range. @param args['event_id']: The event id @type args['event_id']: str @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str @param args['cols']: Columns and it's content that will be include if don't exist or it's empty it will include all cols @type args['cols']: [ [ str, str ], ] """ # Get a MySQL friendly date lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() tbl_name = get_customevent_table(args['event_id']) col_names = get_customevent_args(args['event_id']) sql_query = ["SELECT creation_time FROM %s WHERE creation_time > '%s'" % (tbl_name, lower)] sql_query.append("AND creation_time < '%s'" % upper) sql_param = [] for col_bool, col_title, col_content in args['cols']: if not col_title in col_names: continue if col_content: if col_bool == "and" or col_bool == "": sql_query.append("AND %s" % wash_table_column_name(col_title)) elif col_bool == "or": sql_query.append("OR %s" % wash_table_column_name(col_title)) elif col_bool == "and_not": sql_query.append("AND NOT %s" % wash_table_column_name(col_title)) else: continue sql_query.append(" LIKE %s") sql_param.append("%" + col_content + "%") sql_query.append("ORDER BY creation_time DESC") sql = ' '.join(sql_query) dates = [x[0] for x in run_sql(sql, tuple(sql_param))] return _get_trend_from_actions(dates, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format']) def get_customevent_dump(args): """ Similar to a get_event_trend implemention, but NO refining aka frequency handling is carried out what so ever. This is just a dump. A dump! @param args['event_id']: The event id @type args['event_id']: str @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str @param args['cols']: Columns and it's content that will be include if don't exist or it's empty it will include all cols @type args['cols']: [ [ str, str ], ] """ # Get a MySQL friendly date lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() # Get customevents # events_list = [(creation_time, event, [arg1, arg2, ...]), ...] event_list = [] event_cols = {} for event_id, i in [(args['ids'][i], str(i)) for i in range(len(args['ids']))]: # Get all the event arguments and creation times tbl_name = get_customevent_table(event_id) col_names = get_customevent_args(event_id) sql_query = ["SELECT * FROM %s WHERE creation_time > '%s'" % (tbl_name, lower)] # Note: SELECT * technique is okay here sql_query.append("AND creation_time < '%s'" % upper) sql_param = [] for col_bool, col_title, col_content in args['cols' + i]: if not col_title in col_names: continue if col_content: if col_bool == "and" or col_bool == "": sql_query.append("AND %s" % \ wash_table_column_name(col_title)) elif col_bool == "or": sql_query.append("OR %s" % \ wash_table_column_name(col_title)) elif col_bool == "and_not": sql_query.append("AND NOT %s" % \ wash_table_column_name(col_title)) else: continue sql_query.append(" LIKE %s") sql_param.append("%" + col_content + "%") sql_query.append("ORDER BY creation_time DESC") sql = ' '.join(sql_query) res = run_sql(sql, tuple(sql_param)) for row in res: event_list.append((row[1], event_id, row[2:])) # Get the event col names try: event_cols[event_id] = cPickle.loads(run_sql( "SELECT cols FROM staEVENT WHERE id = %s", (event_id, ))[0][0]) except TypeError: event_cols[event_id] = ["Unnamed"] event_list.sort() output = [] for row in event_list: temp = [row[1], row[0].strftime('%Y-%m-%d %H:%M:%S')] arguments = ["%s: %s" % (event_cols[row[1]][i], row[2][i]) for i in range(len(row[2]))] temp.extend(arguments) output.append(tuple(temp)) return output def get_customevent_table(event_id): """ Helper function that for a certain event id retrives the corresponding event table name. """ res = run_sql( "SELECT CONCAT('staEVENT', number) FROM staEVENT WHERE id = %s", (event_id, )) try: return res[0][0] except IndexError: # No such event table return None def get_customevent_args(event_id): """ Helper function that for a certain event id retrives the corresponding event argument (column) names. """ res = run_sql("SELECT cols FROM staEVENT WHERE id = %s", (event_id, )) try: if res[0][0]: return cPickle.loads(res[0][0]) else: return [] except IndexError: # No such event table return None # CUSTOM SUMMARY SECTION def get_custom_summary_data(query, tag): """Returns the annual report data for the specified year @param year: Year of publication on the journal @type year: int @param query: Search query to make customized report @type query: str @param tag: MARC tag for the output @type tag: str """ # Check arguments if tag == '': tag = "909C4p" # First get records of the year recids = perform_request_search(p=query, of="id") # Then return list by tag pub = list(get_most_popular_field_values(recids, tag)) sel = 0 for elem in pub: sel += elem[1] if len(pub) == 0: return [] if len(recids) - sel != 0: pub.append(('Others', len(recids) - sel)) pub.append(('TOTAL', len(recids))) return pub def create_custom_summary_graph(data, path, title): """ Creates a pie chart with the information from the custom summary and saves it in the file specified by the path argument """ # If no input, we don't bother about anything if len(data) == 0: return os.environ['HOME'] = CFG_TMPDIR try: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt except ImportError: return # make a square figure and axes matplotlib.rcParams['font.size'] = 8 labels = [x[0] for x in data] numb_elem = float(len(labels)) width = 6 + numb_elem / 7 gfile = plt.figure(1, figsize=(width, 6)) plt.axes([0.1, 0.1, 4.2 / width, 0.7]) numb = [x[1] for x in data] total = sum(numb) fracs = [x * 100 / total for x in numb] colors = [] random.seed() for i in range(numb_elem): col = 0.5 + float(i) / (numb_elem * 2.0) rand = random.random() / 2.0 if i % 3 == 0: red = col green = col + rand blue = col - rand if green > 1.0: green = 1 elif i % 3 == 1: red = col - rand green = col blue = col + rand if blue > 1.0: blue = 1 elif i % 3 == 2: red = col + rand green = col - rand blue = col if red > 1.0: red = 1 colors.append((red, green, blue)) patches = plt.pie(fracs, colors=tuple(colors), labels=labels, autopct='%1i%%', pctdistance=0.8, shadow=True)[0] ttext = plt.title(title) plt.setp(ttext, size='xx-large', color='b', family='monospace', weight='extra bold') legend_keywords = {"prop": {"size": "small"}} plt.figlegend(patches, labels, 'lower right', **legend_keywords) plt.savefig(path) plt.close(gfile) # GRAPHER def create_graph_trend(trend, path, settings): """ Creates a graph representation out of data produced from get_event_trend. @param trend: The trend data @type trend: [(str, str|int|(str|int,...))] @param path: Where to store the graph @type path: str @param settings: Dictionary of graph parameters @type settings: dict """ # If no input, we don't bother about anything if len(trend) == 0: return # If no filename is given, we'll assume STD-out format and ASCII. if path == '': settings["format"] = 'asciiart' if settings["format"] == 'asciiart': create_graph_trend_ascii_art(trend, path, settings) elif settings["format"] == 'gnuplot': create_graph_trend_gnu_plot(trend, path, settings) elif settings["format"] == "flot": create_graph_trend_flot(trend, path, settings) def create_graph_trend_ascii_art(trend, path, settings): """Creates the graph trend using ASCII art""" out = "" if settings["multiple"] is not None: # Tokens that will represent the different data sets (maximum 16 sets) # Set index (=100) to the biggest of the histogram sums index = max([sum(x[1]) for x in trend]) # Print legend box out += "Legend: %s\n\n" % ", ".join(["%s (%s)" % x for x in zip(settings["multiple"], WEBSTAT_GRAPH_TOKENS)]) else: index = max([x[1] for x in trend]) width = 82 # Figure out the max length of the xtics, in order to left align xtic_max_len = max([len(_to_datetime(x[0]).strftime( settings["xtic_format"])) for x in trend]) for row in trend: # Print the xtic xtic = _to_datetime(row[0]).strftime(settings["xtic_format"]) out_row = xtic + ': ' + ' ' * (xtic_max_len - len(xtic)) + '|' try: col_width = (1.0 * width / index) except ZeroDivisionError: col_width = 0 if settings["multiple"] is not None: # The second value of the row-tuple, represents the n values from # the n data sets. Each set, will be represented by a different # ASCII character, chosen from the randomized string # 'WEBSTAT_GRAPH_TOKENS'. # NOTE: Only up to 16 (len(WEBSTAT_GRAPH_TOKENS)) data # sets are supported. total = sum(row[1]) for i in range(len(row[1])): col = row[1][i] try: out_row += WEBSTAT_GRAPH_TOKENS[i] * int(1.0 * col * col_width) except ZeroDivisionError: break if len([i for i in row[1] if type(i) is int and i > 0]) - 1 > 0: out_row += out_row[-1] else: total = row[1] try: out_row += '-' * int(1.0 * total * col_width) except ZeroDivisionError: break # Print sentinel, and the total out += out_row + '>' + ' ' * (xtic_max_len + 4 + width - len(out_row)) + str(total) + '\n' # Write to destination file if path == '': print out else: open(path, 'w').write(out) def create_graph_trend_gnu_plot(trend, path, settings): """Creates the graph trend using the GNU plot library""" try: import Gnuplot except ImportError: return gnup = Gnuplot.Gnuplot() gnup('set style data linespoints') if 'size' in settings: gnup('set terminal png tiny size %s' % settings['size']) else: gnup('set terminal png tiny') gnup('set output "%s"' % path) if settings["title"] != '': gnup.title(settings["title"].replace("\"", "")) if settings["xlabel"] != '': gnup.xlabel(settings["xlabel"]) if settings["ylabel"] != '': gnup.ylabel(settings["ylabel"]) if settings["xtic_format"] != '': xtics = 'set xtics (' xtics += ', '.join(['"%s" %d' % (_to_datetime(trend[i][0], '%Y-%m-%d \ %H:%M:%S').strftime(settings["xtic_format"]), i) for i in range(len(trend))]) + ')' gnup(xtics) gnup('set format y "%.0f"') # If we have multiple data sets, we need to do # some magic to make Gnuplot eat it, # This is basically a matrix transposition, # and the addition of index numbers. if settings["multiple"] is not None: cols = len(trend[0][1]) rows = len(trend) plot_items = [] y_max = 0 y_min = 0 for col in range(cols): data = [] for row in range(rows): data.append([row, trend[row][1][col]]) plot_items.append(Gnuplot.PlotItems .Data(data, title=settings["multiple"][col])) tmp_max = max([x[col] for x in data]) tmp_min = min([x[col] for x in data]) if tmp_max > y_max: y_max = tmp_max if tmp_min < y_min: y_min = tmp_min if y_max - y_min < 5 and y_min != 0: gnup('set ytic %d, 1, %d' % (y_min - 1, y_max + 2)) elif y_max < 5: gnup('set ytic 1') gnup.plot(*plot_items) else: data = [x[1] for x in trend] y_max = max(data) y_min = min(data) if y_max - y_min < 5 and y_min != 0: gnup('set ytic %d, 1, %d' % (y_min - 1, y_max + 2)) elif y_max < 5: gnup('set ytic 1') gnup.plot(data) def create_graph_trend_flot(trend, path, settings): """Creates the graph trend using the flot library""" out = """ """ open(path, 'w').write(out) def create_graph_table(data, path, settings): """ Creates a html table representation out of data. @param data: The data @type data: (str,...) @param path: Where to store the graph @type path: str @param settings: Dictionary of table parameters @type settings: dict """ out = """ """ if settings['rows'] == []: for row in data: out += """ """ for value in row: out += """ """ % value out += "" else: for dta, value in zip(settings['rows'], data): out += """" out += "
      %s
      %s """ % dta for vrow in value: out += """%s
      """ % vrow out = out[:-6] + "
      " open(path, 'w').write(out) def create_graph_dump(dump, path): """ Creates a graph representation out of data produced from get_event_trend. @param dump: The dump data @type dump: [(str|int,...)] @param path: Where to store the graph @type path: str """ out = "" if len(dump) == 0: out += "No actions for this custom event " + \ "are registered in the given time range." else: # Make every row in dump equally long, insert None if appropriate. max_len = max([len(x) for x in dump]) events = [tuple(list(x) + [None] * (max_len - len(x))) for x in dump] cols = ["Event", "Date and time"] + ["Argument %d" % i for i in range(max_len - 2)] column_widths = [max([len(str(x[i])) \ for x in events + [cols]]) + 3 for i in range(len(events[0]))] for i in range(len(cols)): out += cols[i] + ' ' * (column_widths[i] - len(cols[i])) out += "\n" for i in range(len(cols)): out += '=' * (len(cols[i])) + ' ' * (column_widths[i] - len(cols[i])) out += "\n\n" for action in dump: for i in range(len(action)): if action[i] is None: temp = '' else: temp = action[i] out += str(temp) + ' ' * (column_widths[i] - len(str(temp))) out += "\n" # Write to destination file if path == '': print out else: open(path, 'w').write(out) # EXPORT DATA TO SLS def get_search_frequency(day=datetime.datetime.now().date()): """Returns the number of searches performed in the chosen day""" searches = get_keyevent_trend_search_type_distribution(get_args(day)) return sum(searches[0][1]) def get_total_records(day=datetime.datetime.now().date()): """Returns the total number of records which existed in the chosen day""" tomorrow = (datetime.datetime.now() + datetime.timedelta(days=1)).strftime("%Y-%m-%d") args = {'collection': CFG_SITE_NAME, 't_start': day.strftime("%Y-%m-%d"), 't_end': tomorrow, 'granularity': "day", 't_format': "%Y-%m-%d"} try: return get_keyevent_trend_collection_population(args)[0][1] except IndexError: return 0 def get_new_records(day=datetime.datetime.now().date()): """Returns the number of new records submitted in the chosen day""" args = {'collection': CFG_SITE_NAME, 't_start': (day - datetime.timedelta(days=1)).strftime("%Y-%m-%d"), 't_end': day.strftime("%Y-%m-%d"), 'granularity': "day", 't_format': "%Y-%m-%d"} try: return (get_total_records(day) - get_keyevent_trend_collection_population(args)[0][1]) except IndexError: return 0 def get_download_frequency(day=datetime.datetime.now().date()): """Returns the number of downloads during the chosen day""" return get_keyevent_trend_download_frequency(get_args(day))[0][1] def get_comments_frequency(day=datetime.datetime.now().date()): """Returns the number of comments during the chosen day""" return get_keyevent_trend_comments_frequency(get_args(day))[0][1] def get_loans_frequency(day=datetime.datetime.now().date()): """Returns the number of comments during the chosen day""" return get_keyevent_trend_number_of_loans(get_args(day))[0][1] def get_web_submissions(day=datetime.datetime.now().date()): """Returns the number of web submissions during the chosen day""" args = get_args(day) args['doctype'] = 'all' return get_keyevent_trend_web_submissions(args)[0][1] def get_alerts(day=datetime.datetime.now().date()): """Returns the number of alerts during the chosen day""" args = get_args(day) args['cols'] = [('', '', '')] args['event_id'] = 'alerts' return get_customevent_trend(args)[0][1] def get_journal_views(day=datetime.datetime.now().date()): """Returns the number of journal displays during the chosen day""" args = get_args(day) args['cols'] = [('', '', '')] args['event_id'] = 'journals' return get_customevent_trend(args)[0][1] def get_basket_views(day=datetime.datetime.now().date()): """Returns the number of basket displays during the chosen day""" args = get_args(day) args['cols'] = [('', '', '')] args['event_id'] = 'baskets' return get_customevent_trend(args)[0][1] def get_args(day): """Returns the most common arguments for the exporting to SLS methods""" return {'t_start': day.strftime("%Y-%m-%d"), 't_end': (day + datetime.timedelta(days=1)).strftime("%Y-%m-%d"), 'granularity': "day", 't_format': "%Y-%m-%d"} # EXPORTER def export_to_python(data, req): """ Exports the data to Python code. @param data: The Python data that should be exported @type data: [] @param req: The Apache request object @type req: """ _export("text/x-python", str(data), req) def export_to_csv(data, req): """ Exports the data to CSV. @param data: The Python data that should be exported @type data: [] @param req: The Apache request object @type req: """ csv_list = [""""%s",%s""" % (x[0], ",".join([str(y) for y in \ ((type(x[1]) is tuple) and x[1] or (x[1], ))])) for x in data] _export('text/csv', '\n'.join(csv_list), req) def export_to_excel(data, req): """ Exports the data to excel. @param data: The Python data that should be exported @type data: [] @param req: The Apache request object @type req: """ if not xlwt_imported: raise Exception("Module xlwt not installed") book = xlwt.Workbook(encoding="utf-8") sheet1 = book.add_sheet('Sheet 1') for row in range(0, len(data)): for col in range(0, len(data[row])): sheet1.write(row, col, "%s" % data[row][col]) filename = CFG_TMPDIR + "/webstat_export_" + \ str(time.time()).replace('.', '') + '.xls' book.save(filename) redirect_to_url(req, '%s/stats/export?filename=%s&mime=%s' \ % (CFG_SITE_URL, os.path.basename(filename), 'application/vnd.ms-excel')) # INTERNAL def _export(mime, content, req): """ Helper function to pass on the export call. Create a temporary file in which the content is stored, then let redirect to the export web interface. """ filename = CFG_TMPDIR + "/webstat_export_" + \ str(time.time()).replace('.', '') open(filename, 'w').write(content) redirect_to_url(req, '%s/stats/export?filename=%s&mime=%s' \ % (CFG_SITE_URL, os.path.basename(filename), mime)) def _get_trend_from_actions(action_dates, initial_value, t_start, t_end, granularity, dt_format): """ Given a list of dates reflecting some sort of action/event, and some additional parameters, an internal data format is returned. 'initial_value' set to zero, means that the frequency will not be accumulative, but rather non-causal. @param action_dates: A list of dates, indicating some sort of action/event. @type action_dates: [datetime.datetime] @param initial_value: The numerical offset the first action's value should make use of. @type initial_value: int @param t_start: Start time for the time domain in format %Y-%m-%d %H:%M:%S @type t_start: str @param t_stop: End time for the time domain in format %Y-%m-%d %H:%M:%S @type t_stop: str @param granularity: The granularity of the time domain, span between values. Possible values are [year,month,day,hour,minute,second]. @type granularity: str @param dt_format: Format of the 't_start' and 't_stop' parameters @type dt_format: str @return: A list of tuples zipping a time-domain and a value-domain @type: [(str, int)] """ # Append the maximum date as a sentinel indicating we're done action_dates.insert(0, datetime.datetime.max) # Create an iterator running from the first day of activity dt_iter = _get_datetime_iter(t_start, granularity, dt_format) # Construct the datetime tuple for the stop time stop_at = _to_datetime(t_end, dt_format) - datetime.timedelta(seconds=1) # If our t_start is more recent than the initial action_dates, we need to # drop those. t_start_dt = _to_datetime(t_start, dt_format) while action_dates[-1] < t_start_dt: action_dates = action_dates[:-1] vector = [(None, initial_value)] # pylint: disable=E1101 old = dt_iter.next() # pylint: enable=E1101 upcoming_action = action_dates.pop() for current in dt_iter: # Counter of action_dates in the current span, set the initial value to # zero to avoid accumlation. if initial_value != 0: actions_here = vector[-1][1] else: actions_here = 0 # Check to see if there's an action date in the current span while old <= upcoming_action < current: actions_here += 1 try: upcoming_action = action_dates.pop() except IndexError: upcoming_action = datetime.datetime.max vector.append((old.strftime('%Y-%m-%d %H:%M:%S'), actions_here)) old = current # Make sure to stop the iteration at the end time if current > stop_at: break # Remove the first bogus tuple, and return return vector[1:] def _get_datetime_iter(t_start, granularity='day', dt_format='%Y-%m-%d %H:%M:%S'): """ Returns an iterator over datetime elements starting at an arbitrary time, with granularity of a [year,month,day,hour,minute,second]. @param t_start: An arbitrary starting time in format %Y-%m-%d %H:%M:%S @type t_start: str @param granularity: The span between iterable elements, default is 'days'. Possible values are [year,month,day,hour,minute,second]. @type granularity: str @param format: Format of the 't_start' parameter @type format: str @return: An iterator of points in time @type: iterator over datetime elements """ tim = _to_datetime(t_start, dt_format) # Make a time increment depending on the granularity and the current time # (the length of years and months vary over time) span = "" while True: yield tim if granularity == "year": span = (calendar.isleap(tim.year) and ["days=366"] or ["days=365"])[0] elif granularity == "month": span = "days=" + str(calendar.monthrange(tim.year, tim.month)[1]) elif granularity == "day": span = "days=1" elif granularity == "hour": span = "hours=1" elif granularity == "minute": span = "minutes=1" elif granularity == "second": span = "seconds=1" else: # Default just in case span = "days=1" tim += eval("datetime.timedelta(" + span + ")") def _to_datetime(dttime, dt_format='%Y-%m-%d %H:%M:%S'): """ Transforms a string into a datetime """ return datetime.datetime(*time.strptime(dttime, dt_format)[:6]) def _run_cmd(command): """ Runs a certain command and returns the string output. If the command is not found a string saying so will be returned. Use with caution! @param command: The UNIX command to execute. @type command: str @return: The std-out from the command. @type: str """ return commands.getoutput(command) def _get_doctypes(): """Returns all the possible doctypes of a new submission""" doctypes = [("all", "All")] for doctype in get_docid_docname_alldoctypes(): doctypes.append(doctype) return doctypes def _get_item_statuses(): """Returns all the possible status of an item""" return [("available", "Available"), ("requested", "Requested"), ("on loan", "On loan"), ("missing", "Missing")] def _get_item_doctype(): """Returns all the possible types of document for an item""" dts = [] for dat in run_sql("""SELECT DISTINCT(request_type) FROM crcILLREQUEST ORDER BY request_type ASC"""): dts.append((dat[0], dat[0])) return dts def _get_request_statuses(): """Returns all the possible statuses for an ILL request""" dts = [] for dat in run_sql("SELECT DISTINCT(status) FROM crcILLREQUEST ORDER BY status ASC"): dts.append((dat[0], dat[0])) return dts def _get_libraries(): """Returns all the possible libraries""" dts = [] for dat in run_sql("SELECT name FROM crcLIBRARY ORDER BY name ASC"): dts.append((dat[0], dat[0])) return dts def _get_loan_periods(): """Returns all the possible loan periods for an item""" dts = [] for dat in run_sql("SELECT DISTINCT(loan_period) FROM crcITEM ORDER BY loan_period ASC"): dts.append((dat[0], dat[0])) return dts def _get_tag_name(tag): """ For a specific MARC tag, it returns the human-readable name """ res = run_sql("SELECT name FROM tag WHERE value LIKE '%%%s%%'" % (tag)) if res: return res[0][0] res = run_sql("SELECT name FROM tag WHERE value LIKE '%%%s%%'" % (tag[:-1])) if res: return res[0][0] return '' diff --git a/modules/websubmit/doc/admin/websubmit-admin-guide.webdoc b/modules/websubmit/doc/admin/websubmit-admin-guide.webdoc index 860e3e774..1ec3e0e9c 100644 --- a/modules/websubmit/doc/admin/websubmit-admin-guide.webdoc +++ b/modules/websubmit/doc/admin/websubmit-admin-guide.webdoc @@ -1,3777 +1,3777 @@ ## -*- mode: html; coding: utf-8; -*- ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

      Disclaimer: Though this guide provides all the necessary information to start learning WebSubmit from scratch and reach a good level in administrating it, this guide is not yet fully complete, and might contain information that has not been strictly verified (sample codes are for eg. provided "AS IS", only to offer some guidance to the admin).
      Specific topics would gain to be more developed, such as HOW-TOs, sample workflows (for eg. approval workflows and referee management). At this point the demo submissions that come standard with the Atlantis demo site remain essential companions to this guide.
      Contributions are welcome (for eg. sample workflows, function descriptions, etc.)

      Contents

          (Check out the old WebSubmit admin guide)

      1. Overview

      1.1 How WebSubmit Works

      WebSubmit provides the infrastructure to set up customized pages for your users to submit new metadata and files to your repository. It is highly flexible in order to accomodate to the various type of documents that you might need to archive. As a consequence of this flexibility, it requires a good level of understanding of the concepts behind WebSubmit.

      A simplied schema of a typical WebSubmit workflow is the following one (figure 1): one or several pages are presented to the user to enter some information, such as title of the document, authors, etc. Each of these pages contain a form with one or several WebSubmit elements, which can either display some information to the user, or ask for input from the user. WebSubmit elements are described more in detailed further below. After the user has finished filling the forms on all pages, a series of WebSubmit functions are called. These functions will typically (1) post-process the data, (2) create a MARCXML, (3) upload the created MARCXML file, and (4) send confirmation email(s).

      One thing worth to learn from this simple workflow is that (1) functions are executed one after each other, (2) that each function can have side effects, such as sending email, and that (3) the output of these functions is displayed to the user. Typical submissions use many side-effect functions, and only one function that give some feedback to the user (in the form of a web page). Also most submissions usually need only a single page.

      Finally note that you can plug check on each field of a page, so that the user cannot proceed further if some invalid text has been input.

      Functions are also organized in steps (Figure 2). By default, WebSubmit runs the "step 1 block" and then stops: to run the next steps one must have a function at the end of step 1 that jump to another block (for eg. CaseEDS function) or have a WebSubmit element that set the input form "step" to the value of the block number (such as "Create_Modify_Interface" function).

      A set of WebSubmit functions comes installed by default in Invenio, to provide all the necessary functionalities to create your own workflow. The behaviour of these functions can be customized through their parameters. Advanced users can also create their own functions in Python (see further below in this guide). The main difficulty for beginners is to pick the adequate function and carefully choose their ordering. It is recommended to get inspiration from the sample demo submission at first.

      It is particulary important at this point to understand that the WebSubmit engine more or less limits to 1) displaying page to collect data, and 2) run functions. It does not take care of building a record and inserting it into a collection, but expects to have a set of functions configured to do so.

      Such a multi-page submission could appear to users as shown in figure 4. Note that this figure shows a special page 0. This "cover" page is mandatory for all submissions, and is automatically generated by WebSubmit. It can be customized to 1) display a description of the submission, 2) show the available "actions" (described further below) and 3) let the users chose among the available "categories" (described further below).

      Page "0"   Page 1   Page 2   Functions ouput

      Indeed, typical submissions do not contain only one, but several independant workflows called "actions": one action might be dedicated to the submission of a document, while another one will let the user modify a previously submitted record. Different actions can therefore display different sets of pages and call different post-processing functions. The first page of a submission (page "0") will let users chose among the offered actions.
      By convention we use 3-letters names for the actions of a submission. For example:

      • SBI: submit a new record
      • MBI: modify the metadata of a record
      • SRV: submit a revised file

      Actions are displayed as several buttons (blue by default) for users to choose from to start a new submission (Figure 6):

      Figure 6 also shows the possibility to select among various categories prior to jumping into one of the available actions. These categories usually don't have a direct impact on the chosen workflow. Think of them simply as a simple WebSubmit element place on the first page, that is common to all the actions of your submission (indeed you could set up your submissions to have such categories inside your submission actions pages, but that would require additional work).

      Last, but not least, a submission is usually referred to by a short name (at most 5 letters), reused in many places in the WebSubmit admin interface.

      To summarize:
      • A submission is made of different actions
      • An action is a workflow made of pages, checks and a flow of functions.
      • A page contains several WebSubmit elements, usually input elements with some label.
      • A WebSubmit element is a control on the interface to input or display values.
      • Javacript checks can be attached to WebSubmit elements, in order to validate the input data before going to a futher step of the submission.
      • A function performs some post-processing operations, usually on data collected thanks to WebSubmit elements. Functions can have side-effects and outputs
      • Functions are organized in steps, blocks of functions

      Another concept remains to be explained, but this functionality tends to disappear from submissions, and might be deprecated at some point. We provide the explanation about it below only for completeness, but it is strongly discouraged to go that way:

      It is possible to group actions in sets: an action set is a succession of actions which should be done in a given order when a user starts.
      For example the submission of a document can be composed of two actions: Submission of Bibliographic Information (SBI) and Fulltext Transfer (FTT) which should be done one after the other.
      When the user starts the submission, we want the submission to get him first in SBI and when he finishes SBI to carry him to FTT. SBI and FTT are in this case in the same action set. They will both have a level of 1 ("level" is a bad name, it should be "action set number"), SBI will have a score of 1, and FTT a score of 2 (which means it will be started after SBI). If you set the stpage of FTT to 2, the user will be directly carried to the 2nd page of the FTT web form. This value is usually set to 1.
      The endtxt field contains the text which will be displayed to the user at the end of the first action (here it could be "you now have to transfer your files")
      A single action like "Modify Bibliographic Information" should have the 3 columns to 0,0 and 1.

      1.2 Behind the scenes

      This section highlights a few key behaviours of WebSubmit which are particularly important to understand when designing a submission.

      When a user starts a new submission, a working directory is created on disk in order to store all the collected values. This working directory is usually called the "curdir". It is located in a subdirectory of /opt/invenio/var/data/submit/storage/{action directory}/{submission code}/{submission access number} where {submission code} is the short name of a submission and {submission access number} is a unique submission session identifier (displayed on the web submission interface as the submission number).{action directory} is running for SBI actions, modify for "MBI" actions, revise for "SRV" actions, etc. (This is configured in the "Actions" part of the WebSubmit admin interface)
      Whenever the user moves from one page to the other, or submit the form, the curdir is populated with files named after the submission elements displayed on the page, with their content being the user inserted values (User uploaded files can be found by default in the curdir/files/ directory). It is these files that WebSubmit functions such "Create_Record" or "Modify_Record" will use in order to create the MARCXML to upload (Note that the output of these functions will be a file named "recmysql" in the curdir, that will contain the MARCXML to upload)

      The curdir contains a few other additional files:

      • function_log: the list of functions called by the WebSubmit engines
      • SuE: the email of the submitter
      • doctype: the short name (code name) of the current submission
      • act: the current action (SBI, MBI, etc.)
      • step: the step of the functions
      • curpage: the current page of the submission
      • ln: the language chosen by the user to display the web interface
      • combo{doctype}: for eg. comboDEMOART contains the chosen category on page "0".
      • etc.

      The path to the curdir can sometimes be slightly different, depending on the chosen action. For eg. the SRV action will use /opt/invenio/var/data/submit/storage/revise/{submission code}/{submission access number} where {submission code}

      When the functions will run they will most probably create additional files, such as "SN" created by the "Create_Recid" function which reserves a record id, "RN" created by function "Report_Number_Generation" to reserve a report number, or the "recmysql" file already mentionned above. Many of these output file then become input parameters for the next functions to be executed. This shows the importance of running a well defined set of functions in a well defined order.

      The curdir is not removed after the end of the submission. This gives you the opportunity to keep track of past submissions in case something would have gone unexpected. However the use of the "Move_to_Done" function will create a zipped archive of this directory (+ rename it using the report number of the record, found in file curdir/RN), and will move it to a different directory, /opt/invenio/var/data/submit/storage/done/running/.

      2. Configure Submissions: a Tutorial

      This chapter is a quick walkthrough for creating your submission. It is not trying to explain everything, but simply goes through the main steps necessary to configure a submission.

      2.1 Creating the submission

      1. Go to the WebSubmit admin interface and click on the "Add New Doctype" button at the bottom of the screen. Give your submission an ID (eg. DEMOTEST. This cannot be changed later and should be kept short. It is used in URLs to link to your submission), a name and a description. The name and the description will be displayed on the users end. The description can contain HTML markup. You can also choose to clone from an already existing submission so that existing configuration for pages, functions, elements, etc. are copied over to your new submission (this might be not wanted if the submission you copy from include submission specific elements).

      2. From the submission page, select from the "Add a new Submission" menu the action to add to your newly created submission. For eg. select "[SBI] Submit New Record" to create an action that will allow users to submit new documents. Press the "Add Submission" button to add the chosen action. You are given the possibility to clone the configuration from another existing submission. Start with a blank one or choose an existing one, then press "Continue".

      3. On the following page, fill in the form:

      • Choose if the action is to be displayed on the start page of your submission.
      • Specify the "button order" on the interface for this action (to define an order between your actions buttons on splash page (page 0) of your submission).
      • Enter the "Status Text": not really used in user interface (to be checked): label for your action in the WebSubmit admin interface.
      • Other fields are related to action sets (chained actions). It is recommended to leave the default values.
        • Input the "End text": text displayed at the end of the previous action to link to this one, provided that this action is chained to another (leaving empty is recommended).
        • Choose the "Stpage": the page number that should be used as starting point when reaching this action from another chained action: leaving '0' is recommended).
        • The "level": the group of actions to which this one belongs, in case it is chained with another action(s) (leaving emtpy is recommended).
        • The "score": the order in which grouped actions are chained (leaving empty is recommended).
      Once done, press the "Save Details" button.

      4. (Optional) Repeat steps 2 and 3 for any other workflow you want to support in your submission. If the action you want to add is not part of the list, click on the available actions menu, press the "Add Action" button and enter the "action code" (for eg. SBI), "description" (displayed as the page title when going through the submission pages), "dir" (in which subdirectory of the default base submission folder the running/done submissions for this action will be saved, for eg. submit), and "status text" (displayed as the label for the action button on the main submission interface). Press Save Details, and you are ready to use this action.

      5. (Optional) To propose a list of categories on the splash page (page 0) of your submission, select your submission from the main WebSubmit admin interface, scroll down to the "Categories" section on the page, enter a new category, with "ID" being the key code of the new category you want to add (this value will be saved in the corresponding file in curdir directory of your submission. Reminder: the file in curdir containing this value will be named comboDEMOTEST, provided that "DEMOTEST" is your submission ID) and "description" being the value displayed to the user for this category. Press "Add Category" to add the category.

      6. (Optional) To enter the list of persons that will be recognized as referees of a submission (for eg. by the "Is_Referee" function), select your submission from the main WebSubmit admin interface, scroll down to the "Manage Referees" section on the page, and click on the "Manage Referees" button.
      Select the user(s) from the list (users must have an account on the system), choose which category they manage, and click "Add". Once done, click "Finished".

      2.2 Building the interface

      1. Go to the main WebSubmit admin interface and select your submission. Choose the action (SBI, MBI, etc.) for which you want to build the interface and click on the corresponding "view interface" link.

      2.If you want to add a new page, click on the "Add a Page" button. Follow the "view page" link displayed next to the newly created page, or the one next to the page you want to modify.

      3. To add a new field on the page, press the "Add a Field" button (at the bottom of the screen). On the following page:

      • Select a field from the existing list of WebSubmit elements.
      • Enter a field label. It will be displayed just before the field on your page. The label can contain HTML. Note that this label will not be used in modification actions (MBI) built using the "Create_Modify_Interface" function. Instead, the "Modification Text" attribute of the element will be used.
      • Set if the field should be mandatory or not. Note that some elements (User Defined Input Elements, Hidden Input Elements and Response Elements) should never be set "mandatory".
      • Give a short description to the label. It will be used for eg. to notify the user that mandatory field named XXX has not been filled in.
      • Select a Javascript check from the list if you want to validate the content of the field according to some criteria.
      Once done, hit the "Add Field" button.
      Note that this step is simply instantiating a WebSubmit element to include on your page. If you want to include a field that does not exist in the available elements, you should first create it. Learn more about the creation of WebSubmit elements in the WebSubmit Elements chapter of this guide.

      4.Repeat step 3 as many times as needed. You can reorder the fields on the page, remove them or change their attribute. The "edit" link next to each field will let you change its attributes. The "element" link will however let you change the attribute of the WebSubmit element itself, i.e. affecting all the submissions having such a field on their page.

      5. You can preview the page by pressing the "View Page Preview" button at the top of the page. Note that Response Elements will however not be previewed.

      6. From the "page" interface you can go back successively to the action interface and the main submission interface by clicking on the "Finished" buttons at the bottom of the pages.

      2.3 Adding the functions

      1. Go to the main WebSubmit admin interface and select your submission. Choose the action (SBI, MBI, etc.) for which you want to build the interface and click on the corresponding "view functions" link.

      2. To insert a function into the workflow, press the "Add a Function" button at the bottom of the screen. On the following page:

      • Select a function from the existing list of WebSubmit functions.
      • Enter the "Step" to which this function should be added (for eg. "1").
      • Enter the "Score" of the function, i.e. its order in the list of functions of the chosen step (for eg. 20). If a function already exists for the chosen score, functions will simply be shifted.
      Once done, hit the "Save Details" button.
      Note that this step is simply inserting an already existing WebSubmit function in your workflow. If you want to include a totally new function you should first create it. Learn more about the creation of WebSubmit functions in the WebSubmit Functions chapter of this guide.

      3. Once the function is inserted you can change its parameters by clicking on the "View parameters" link. Each function has a different set of parameters. Check the function documentation (available from the Available Functions menu of the WebSubmit admin interface) to learn more about the offered options.

      4. Repeat steps 2 and 3 as many times as needed. You can reorder the functions on the page or remove them.

      2.4 Restricting the submission

      Access to the submission interface is mostly restricted via the WebAccess module. You can check out the Access Restrictions chapter of this guide and refer to the WebAccess admin guide for detailed information.

      In addition to WebAccess you can use the following functions to restrict your submission:

      If you have set up an action that requires to modify an existing record (to add file, modify metadata, etc.) you can add the "Is_Original_Submitter" function in order to only let the original submitter of the record modify the record. This function must be added at the beginning of your list of functions (usually after the "Get_Recid" function), for each action, and each step. Check out the Adding the functions section of this guide to learn how to add this function to your workflow.

      You can also use the "User_is_Record_Owner_or_Curator" function to enable access to the original submitter of the record AND users connected to a specific WebAccess role.

      If you have set up an action (for eg. "APP") that requires to approve a document by a referee (defined in the list of referees for your submission) you can add the "Is_Referee" function in order to only let the referee go through. This function must be added at the beginning of your list of functions (usually after the "Get_Recid" function), for each action, and each step. Check out the Adding the functions section of this guide to learn how to add this function to your workflow.

      3. WebSubmit Elements

      WebSubmit elements are the building blocks of submission pages. This section focuses on how to use or create them. Refer to the overview of this guide to learn more about the concept of WebSubmit elements.

      3.1 Existing elements

      The list of existing elements can be found in the "available elements" section of the WebSubmit admin interface. By default these elements are instances used in the demo submissions. You can reuse them, but it is recommended to create new elements to use in your own submissions, excepted for complex "response" elements that are generic enough.

      Once instantiated for a submission, elements become fields on the submission page. It is important to make a difference between the fields attributes, which are submission specific, and the element attributes, which apply to all submission using them.

      3.2 Creating a new element

      This section describes the creation of a customized element. It does not show how to add an already existing element to your submission. Refer to the Tutorial to learn how to add an existing element to your submission.

      To create a new element, go to the the "available elements" section of the WebSubmit admin interface, scroll down to the bottom of the page and press the "Add New Element" button.

      Fill in the form:

      • Element Name: The name of the element (Eg: DEMO_TITLE)
      • Modification Text: The prefix to be used when the element is used by the "Create_Modify_Interface" function (i.e. in MBI actions)
      • Element Type: The type of element:
        • User Defined Input: the element is a static area displaying the content of the field "Element Description". The content must be HTML-escaped (or can be HTML).
        • File Input: the element is a basic control to upload files
        • Hidden Input: the element is an hidden form input field, and its value is the one defined in the "Value" field (below).
        • Text Input: the element is a simple text field. Initial value is the one defined in the "Value" field.
        • Response: the element executes the Python code from the "Element Description" field. The code is executed at runtime when displaying the page. The element output consists in the value assigned to the variable "text" in the scope of this field at the end of the execution of the element.
        • Select Box: a list control. The full HTML code of the list must be given in the "Element Description" field. For eg:
           <select name="DEMO_LANG">
                   <option value="eng">English</option>
                   <option value="fre">French</option>
                   <option value="ger">German</option>
           </select>
           
          The submitted value will be the one defined in the "value" parameter.
        • Text Area Element: An HTML text area field.
      • Marc Code: the MARC code from which the value could be retrieved when the element is used by the "Create_Modify_Interface" function (i.e. in MBI actions)
      • Size: The size of the text input field (for "Text Input" Element Types)
      • No. Rows: The number of rows for "Text Area" Element Types
      • No. Columns: The number of columns for "Text Area" Element Types
      • Maximum Length: The maximum length (in characters) for "Text Input" Element Types. Note that it only sets a limits in the user's browser, but is not check server-side.
      • Value: The initial value for "Text Input" or "Hidden Input" elements
      • Element Description: The content/code for "User Defined Input", "Select Box" and "Response" elements

      Once done, hit the "Save Details" button. You are done with the creation of your element. You can then add it to your submission page.

      3.2.1 User Defined Input Elements

      This element is simply displaying the the content defined in the field "Element Description". The content must be HTML-escaped (or can be HTML). This is element is not really suitable for user-input values.

      3.2.2 File Input Elements

      The element displays a basic control to upload files. The file uploaded with this element can be found upon submission inside [..]/files/ELEMENT_NAME/ (where ELEMENT_NAME is your element name, for eg. DEMOART_FILE) within the submission directory.

      You can then further process the uploaded file with relevant WebSubmit functions (eg. stamp the file), and attach it to the record (see section 5. File Management with WebSubmit of this guide).

      3.2.3 Hidden Input Elements

      Simply create an hidden input field, with the value defined in the "Value" field of the element. The uploaded value can be found as any other element in the submission directory upon submission of the form.

      The main usage of this field is to upload a statically defined value in order to check if the form has already been submitted. Static values to be part of the record would better be defined in the BibConvert configuration file used to create the record.

      3.2.4 Text Input Elements

      A simple text input field, Nothing much to say about it excepted that it is usually the most used of all elements.

      3.2.5 Response Elements

      Response elements are elements evaluated at runtime, which execute the Python code they embed. These elements are useful when you need to display complex controls that are not supported by default by WebSubmit, or if you need to generate content dynamically. The returned output (displayed on the submission form) of response elements is the one defined at the end of the execution in the "text" variable.
      For eg. to display a radio button one would write:

       # Sample radio button element
       text = ""
       options = [1, 2, 3]
       for option in options:
           text += '<input type="radio" name="group1" id="%(opt)i" value="%(opt)i"><label for="%(opt)i">Option %(opt)i</label>' % {'opt': option}
       
      which would display as:

      Upon submission of the form, a file named "group1" would be created in that case with the chosen value in the submission directory.

      Response elements have "magically" access to some global variables, provided that they have been set at the moment of executing the element:

      • sysno the current record id
      • rn the current report number
      • act the current submission action (SBI, MBI, etc.)
      • curdir the path of the current submission directory
      • uid the user ID of the current submitter
      • uid_email the email of the current submitter

      When defining a response element you should be aware of a few traps:

      • You must expect that the page can be reloaded. In that case possible actions performed by your element should not be done twice. You also have to take care of the displayed state of your element. For eg. a list generated by a response element should not reset to the default value when the page refreshes if the user has already chosen a custom value. You take care of this by reading the corresponding file in the submission directory.
      • When used in MBI (modify) actions with the "Create_Modify_Interface" function (which takes care of building the modification form by mirroring the page defined for the initial submission, SBI), you should read the initial state from the record (if defined in the record), or from the curdir if the page is refreshed.
      • You should never specify a response element as "mandatory" when including it on your page.
      A possible skeleton for a response element could be: (FIXME: Check...)

       import os
       from invenio.websubmit_functions.ParamFile import ParamFromFile
      -from invenio.search_engine import get_fieldvalues
      +from invenio.search_engine_utils import get_fieldvalues
       
       this_element_name = "DEMOART_TEST" # This would be your element name
       
       if act == "SBI" and not os.path.exists(os.path.join(curdir, this_element_name)):
           # Set initial value in case user has not already chosen one.
           # This is only needed if you want to set a non-empty default
           # value, for eg. retrieved from a remote service, etc.
           # Otherwise the 'else' statement would be enough.
           default_value = "A default value" # or any default value
       elif act == "MBI" and not os.path.exists(os.path.join(curdir, this_element_name)):
           # We are displaying a modification interface for the first time:
           # Read initial value from the record for eg.
           default_value = get_fieldvalues(sysno, '245__a')
       else:
           # Get user chosen value in case page is reloaded.
           # The ParamFromFile() returns empty string if value was not set,
           # so this is also suitable for setting empty initial values.
           default_value = ParamFromFile(os.path.join(curdir, this_element_name))
       
       # Do something ...
       text = '<input type="text" name="%s" value="%s"/>' % (this_element_name, default_value)
       

      Since response element needs the submission context and can possibly have side effects, they are never executed when previewing your submission pages from the WebSubmit admin interface.

      3.2.6 Select Box Elements

      Select Box elements are used to display lists menus (either as dropdown menu or multiple selection list). The element is not smart enough to save you from specifying the HTML markup of the list, but will at least set the right intial state when reloading the submission page or when used in MBI actions.

      You would for eg. define the following "description" for an element displaying a list of languages:

       <select name="DEMOART_LANG">
               <option>Select:</option>
               <option value="eng">English</option>
               <option value="fre">French</option>
               <option value="ger">German</option>
               <option value="dut">Dutch</option>
       </select>
       

      In the above example a file named "DEMOART_LANG" will be created with the user chosen value (for eg. "ger") in the submission directory.

      Note that if you set the element as being "mandatory" on your page, the initial "Select:" value must be the first option of your list (you can otherwise let specify the element as optional, and remove this item if wanted).

      3.3 Creating a new check

      When adding an existing element to your submission page you can associate a Javacript check to the element. You can choose from the existing one or define your own check from the Available Checks menu of the WebSubmit admin interface.

      From the "Available Checks" page, select "Add check", give it a name and a "description": the description corresponds to the Javascript code to be executed to validate the form before submitting it. In this description you should define a Javascript function named after your check, that takes a string (the value to validate) as input. The function must then return 0 if the check fails (the form cannot be submitted) or 1 if the check passes. In addition you may want to raise an alert notifying the user about the error.

      For eg. to check if the given number of a field is smaller than 10, we create a "check" named Smaller_Ten:

       def Smaller_Ten(txt) {
           /* Check if input is strictly smaller than 10 */
       
           if (parseInt(txt) < 10 && parseInt(txt).toString()==txt) {
               // Note that parseInt('9a') returns 9, hence the '.toString()==txt' test.
               return 1;
           } else {
               alert("The given number is not smaller than 10! Please fix it.");
               return 0;
           }
       }
       

      4. WebSubmit Functions

      This section focuses on how to create new WebSubmit functions and use existing ones. To learn more about the concept of WebSubmit functions, read the Overview section of this guide.

      4.1 Existing functions

      The list of existing functions can be found in the "available functions" section of the WebSubmit admin interface. Click on "Edit Details" links to read more about the functions.

      You add existing functions in the functions list of each action (SBI, MBI, etc.) of your submission in order to post-process user-submitted values and build your customized workflow. Some functions have some prerequisites on the order they are run, and the functions that must precede them. For eg. many functions expect the "Get_Recid" function to run before them. You can check the workflows provided with the Atlantis Demo installation

      4.2 Creating a new function

      This section describes the creation of a customized function. It does not show how to add an already existing function to your submission. Refer to the Tutorial to learn how to add an existing function to your submission.

      A WebSubmit function corresponds to a Python file, which must be named after the function name (eg "My_Function" => "My_Function.py") and placed into the /opt/invenio/lib/python/invenio/websubmit_functions/ directory. The file must also contain a Python function with the same "My_Function" name. This function interface must be the following one:

       def My_Function(parameters, curdir, form, user_info=None):
       
      where
      • parameters: a dictionary containing the parameters and values that can be configured in the submission web interface.
      • curdir: the path to the current working directory.
      • form: the form passed to the current web page for possible reference from inside the function.
      • user_info: the user_info objet reprenting the current user
      The values returned by the function are printed on the last submission page.

      For the function to be available from the WebSubmit admin interface, it must be specifically inserted from the admin interface. Scroll down to the bottom of the list, and press "Add New Function". Insert the function name, as well as all the wished parameters for the function.

      5. File Management with WebSubmit

      This chapters introduces different strategies to enable file upload in WebSubmit submissions. You should already have a good understanding of how WebSubmit works before reading further. Some practice in WebSubmit submission implementation is also highly recommended in order to understand the techniques introduced below. To some extent, you might want to come back to this chapter only once you have already set up your submission, and are about to implement file support, as the documentation below is sometimes describing detailed implementation steps.

      Several techniques exists to handle files, to accommodate to various use cases. Just read further below to choose the most appropriate technique based on your needs.

      5.1 File Input + FFT Technique

      The most "basic" way of letting your users submit files is to add a File Input element to your submission page(s), one for each possible file to upload, in the same way as you add other input fields.
      This technique is useful if you need to handle a well known number of files.

      Limitations:
      • incompatible with function "Move_to_Done", as the path in the FFT tag would be wrong.
      • revision of files requires well-defined filenames
      • cannot easily delete files
      • cannot easily support file attributes (description, restriction, name, etc.) modifications
      Procedure:

      1) You can reuse an already existing File Input element, or create your own. If you want to reuse an existing one, jump straight to point 3 below. Otherwise, head to the WebSubmit admin interface, select "6. Available Elements" in the menu, scroll down the opening page and hit "Add New Element" button.

      2) Choose a name for your new element (For e.g. "DEMO_FILE"). Select the "File Input" item of the "Element Type" menu. Once done, click on the "Save Detais" button.

      3) Go to the main WebSubmit admin interface and select the submission you want to edit (for e.g. "DEMOART"), then action (for e.g. "SBI"), then the page. Scroll to the bottom of the page, and click on the "Add a Field" button.

      4) From the "Field Name" menu, select the desired input file element (for e.g. "DEMO_FILE", if you have created it in previous steps). Fill in the other usual fields, and click "Add Field". Reorder the elements on the page as needed.

      At this step your users will be able to upload a file to the server during the submission process. Indeed if you have a look at the corresponding submission directory in /opt/invenio/var/data/submit/storage/ you will see the uploaded file in the /files/DEMO_FILE/ directory, plus a standard DEMO_FILE file containing the path to the uploaded file. However the file is not attached to the uploaded record: you must add a corresponding entry in the BibConvert template, in a similar fashion as you would with other input fields.

      5) Open your BibConvert "target" template used by the "Make_Record" or "Make_Modify_Record" in your preferred editor. If you know where to find your BibConvert templates, jump to point 6. Otherwise continue reading: the BibConvert templates are used by the "Make_Record" and "Make_Modify_Record" to create a MARCXML according to some specific rules. From your submission page, click on "view functions" of the action you want to edit, then "view parameters" of the Make_Record/Make_Modify_Record function. The "create/modifyTemplate" and "sourceTemplate" are the names of the BibConvert templates you can find in the /opt/invenio/etc/bibconvert/config/ directory (Depending on the authorization on disk, you might even be able to edit the files from the web interface). Read more about BibConvert in the BibConvert admin guide.

      6) Add an FFT tag to your target BibConvert template. FFT is a special tag interpreted by BibUpload in order to handle files. You will find an example below, but you can read more about the FFT syntax in the BibUpload admin guide

       FFT::REPL(EOL,)---<datafield tag="FFT" ind1=" " ind2=" "><subfield code="a"><:curdir::curdir:>/files/DEMO_FILE/<:DEMO_FILE::DEMO_FILE:></subfield><subfield code="n">My File</subfield><subfield code="t">Main</subfield></datafield>
       

      The sample line above will rename the uploaded record to "My File", and then attach it to the record (once the created MARCXML will be BibUploaded). Note that you could keep the original name, or name the file after the report number, specify a doctype such as "Main", or "additional", include a comment specified in another field, etc. Simply modify the FFT tag according to your needs. Note however that this technique will allow to revise the file only if you can identify it later by a well defined name. The above line is also uploading the file in the category, or doctype "Main"

      7) One last thing not to forget is to add DEMO_FILE to the source BibConvert template, as you would for any other WebSubmit element. Open the source BibConvert template (which is also given as parameter to the Make_Record/Make_Modify_Record functions, and can be found in the /opt/invenio/etc/bibconvert/config/ directory), and add for example:

       DEMO_FILE---<:DEMO_FILE:>
       

      Repeat this procedure to add additional input file fields. It is perfectly ok to have several FFT field instances in the templates.

      Note that if one of the file input fields is left empty by the user, no file is uploaded, no DEMO_FILE file is created in the submission directory, but an erroneous FFT line is still inserted in the created output. It is why you might want to make all the File Input fields mandatory, or use the BibConvert MINLW(..) function to ensure that the field is created only if the output line is at least a given number of characters (to be computed based on the default length of an empty line). This shows that this technique reaches its limits quite quickly in terms of flexibility.

      Revising/deleting files

      To revise files you would create a BibConvert template with the adequate FFT tag. We assume below that you set up the modification interface by using the Create_Modify_Interface function/technique, so that we can reuse the submission page set up for the "SBI" action. The key point is that the Input File element name is well known ("DEMO_FILE" in our case).

      1) Open your BibConvert "target" template used by the "Make_Modify_Record" function. Note that it should not be the same one as used in the "SBI" action of your submission, as it must create different outputs.

      2) Add an FFT tag to revise your file:

               <datafield tag="FFT" ind1=" " ind2=" ">
                   <subfield code="a"><:curdir::curdir:>/files/DEMO_FILE/<:DEMO_FILE::DEMO_FILE:></subfield>
       	    <subfield code="n">My File</subfield>
       	    <subfield code="d">KEEP-OLD-VALUE</subfield>
       	    <subfield code="z">KEEP-OLD-VALUE</subfield>
       	    <subfield code="r">KEEP-OLD-VALUE</subfield>
               </datafield>
       

      3) The above FFT will be bibuploaded in --correct mode, hence revising the file named "My File" with the new one. Note in this example the use of the special keyword KEEP-OLD-VALUE to keep the previous comment, description or restriction applied to the file, if any (so that comment is not lost for e.g. if you don't ask a new one).

      You will notice the following limitation: you must be able to map the uploaded file to the target file to revise by its name. This means that you should be able to initially control your filename(s), for e.g. by having it fixed ("Main", "additional", "figure", etc) or guessable, for e.g. using the report number (<:DEMOART_RN::DEMOART_RN:>-main, <:DEMOART_RN::DEMOART_RN:>-additional).

      To circumvent this limitation (as well as the impossibility to delete files), you might combine this technique with one of the techniques described below (For eg: with the Move_Revised_Files_To_Storage function detailed in the Revising/deleting files section of the File Input element + Move_Files_To_Storage function technique)

      5.2 File Input element + Move_Files_To_Storage function

      This way of doing is similar to the technique described above. The main difference is that it leaves the job of actually uploading/revisings the file(s) to a WebSubmit functions, instead of the FFT in the uploaded MARCXML.

      Limitations:
      • revision of files requires well-defined doctype. The consequence is that you can have only one file per doctype (1 "Main", 1 "Additionnal", etc.)
      • cannot easily delete files
      • does not support setting some additional file attributes (description, name, etc.)
      • uploaded doctypes must inherit the names of their File Input elements. For eg. "DEMO_FILE", instead of "Main", "Additional", "Figure", etc.

      1-4) Add a file input field to your submission page as describe in previous technique.

      As before, the file is uploaded to the server once the user ends the submission, but it is not attached to the created record. The solution is to rely on the "Move_Files_To_Storage" function:

      5) Add the "Move_Files_To_Storage" function to your submission functions. It is suggested to insert it after the function "Insert_Record".

      6) Configure the Move_Files_To_Storage function. The key parameter is paths_and_suffixes, which must contain your File Input element names, and possibly map to some suffixes to be added to the corresponding uploaded files.
      For example, add {'DEMO_FILE':'', 'DEMO_FILE2':'_additional'} to have the files uploaded with DEMO_FILE and DEMO_FILE2 elements attached to the record (with the DEMO_FILE2 filename suffixed with "_additional"). The paths_and_restriction works similarly to set the files restrictions.

      Each file is simply attached to the record, with its document type (doctype) being the name of your input file element (for e.g. file uploaded with the "DEMO_FILE" element is attached with document type "DEMO_FILE"). The filenames are kept.

      Revising/deleting files

      The "Move_Revised_Files_To_Storage" must be added to your modification workflow ("MBI"). It will use the file uploaded with your "DEMO_FILE" input element to revise the file with doctype "DEMO_FILE", the file from "DEMO_FILE2" input element to revise file with doctype "DEMO_FILE2", etc.

      1) Go to your modification workflow (MBI), and add Move_Revised_Files_To_Storage to your submission functions (usually after the "Insert_Modify_Record").

      2) Set up the elementNameToDoctype parameter of this function so it maps your File Input field name to the doctype to revise. For eg: "DEMO_FILE=Main" so that file uploaded using the DEMO_FILE input field will be used to replace the file with doctype "Main". This makes the assumption that you indeed previously uploaded (for eg. with an FFT during an SBI step) a file with this doctype.
      You can define several mappings, by using character | as separator. For eg: DEMO_FILE=Main|DEMO_FILE2=Additional.
      If you have initially uploaded your files with the Move_Files_To_Storage function, you will for eg. configure the parameter with "DEMO_FILE=DEMO_FILE", so that file uploaded with DEMO_FILE input field will replace the files that have been previously uploaded with doctype "DEMO_FILE".

      Note that function Move_Revised_Files_To_Storage can be used in combination with other techniques, as long as the mapping in elementNameToDoctype can be done unambiguously.

      Check the Move_Revised_Files_To_Storage function documentation for more detailed information.

      5.3 Create_Upload_Files_Interface + Move_Uploaded_Files_To_Storage functions

      This option offers a full-featured file manager, that can be easily configured to support file upload, revision, deletion, commenting, restrictions, etc. It can handle an "unlimited" number of files.

      The strategy consists in adding a WebSubmit function ("Create_Upload_Files_Interface") to your submission functions list, in order to display a file submission interface. The interface will therefore only show up after all the submission pages have been filled in and submitted. Once displayed, the interface lets the user upload new/revised files: the function refreshes the interface for each upload (runs through the functions list again and stops on the Create_Upload_Files_Interface). When the user applies the modifications, the submission "step" is incremented and executes the submissions function of step 2, skipping the display of the interface. In this step 2 you can perform the usual tasks of your submission. You also must add an additional function (Move_Uploaded_Files_To_Storage) to run at step 2 in order to attach the files that have been submitted at step 1.

      These functions are incompatible with function "Create_Modify_Interface". It is therefore suggested to create a dedicated submission action (in addition to "SBI" and "MBI") to let your users edit the files independently of the bibliographic data. An example of such setup can be found in DEMOPIC submission.

      Limitations:
      • Use of a WebSubmit function to draw the interface, which prevents the interface to be used inside a submission form (is displayed at a later step). Not as integrated as a simple input file form element.
      • Requires Javascript to be enabled user-side (is applicable to all submissions anyway.

      1) Go to your submission in WebSubmit admin, and add a new submission action (for e.g. "[SRV] Submit New File"). If necessary, create your own action in WebSubmit admin "Available WebSubmit Actions" page. You can clone from another existing action (in that case move to point 4 below), or simply create an empty action.

      2) Go to the new SRV action interface ("View Interface"), add a page, open it and add fields that will allow users to specify the record to update. Typically you will add a "DEMO_RN" field to enter the report number, and "DEMO_CONTINUE" button to submit the form.

      3) Go the the new SRV action functions ("View" functions) and add the necessary functions: for e.g. at step 1, "Get_Report_Number", "Get_Recid" and "Create_Upload_Files_Interface". At step 2, "Get_Recid", "Move_Uploaded_Files_to_Storage" and "Print_Success".

      4) Configure the Create_Upload_Files_Interface parameters. There are many options available. Briefly, the most important one is the "doctype" parameter, which lets you specify the document types users are allowed to submit. Use "|" to separate doctypes, and "=" to separate doctype and doctype description. For e.g. input "Main=Main File|Additional=Additional Document" to let users choose either Main or Additional types (which will show as "Main File" and "Additional Document" to users). Other parameters will let you define for which doctype users can revise or delete files (for e.g. specify for canDeleteDoctypes "Additional" so that only these documents can be deleted once they have been uploaded). Use "*" to specify "any declared doctype", and "|" as separator (for all can_*_doctypes parameters).

      To read more about the parameters available for this function, check the Create_Upload_Files_Interface function documentation.

      5) Configure the Move_Uploaded_Files_To_Storage. There are less options than in Create_Upload_Files_Interface function. Specify for e.g. in createIconDoctypes for which doctypes icons will be created, or in "forceFileRevision" if revisions of file attributes trigger a new file revision. For an up-to-date documentation check the Move_Uploaded_Files_to_Storage function documentation.

      Revising/deleting files

      File revisions and deletions comes for free with the functions. Simply allow deletion or revision of files when configuring Create_Upload_Files_Interface.

      5.4 Upload_File element instance + Move_Uploaded_Files_To_Storage function

      This is similar to option 3, except that instead of using a WebSubmit function to build the interface, you use a regular WebSubmit response element. The advantage is that you can plug the WebSubmit element wherever you want on your submission page.

      Limitations:
      • Requires Javascript enabled users-side + support for JQuery library (most "recent" browsers)

      To set up a file upload interface using this technique:

      1) Go to your submission page, and add an element: choose the "Upload_Files" response element. But wait! Read further before:

      2) You most probably want to customize the upload interface (set which types of files can be uploaded, how many, etc.). To do so, you would have to edit the code of the Upload_Files response element and change the parameters of the "create_file_upload_interface(..)" function. However this would affect all submissions using this element. The solution is to "clone" this element (by creating a new element: "Available elements"-> scroll down -> "Add New Element". Choose for e.g. name "DEMO_UploadFiles", Element Type-> "Response" and paste the code of the Upload_Files element in the "Element Description" field). Once done, add the "DEMO_UploadFiles" element to your page.

      3) Go to your submission functions. Add the Move_Uploaded_Files_to_Storage function, and configure it in the same way as it would be done with the option 3, step 5.

      Revising/deleting files

      File revisions and deletions comes for free with the this technique. Simply allow deletion or revision of files when configuring Upload_Files element of the MBI or SRV steps.

      5.5 FCKeditor element instance + Move_FCKeditor_Files_To_Storage function

      This technique relies on the popular HTML rich text editor "FCKeditor", which embeds an interface to upload files. As a consequence it only makes sense to use this technique in the cases where you want files to be uploaded as part of some HTML context. Typical use cases are submissions for the WebJournal module, for which you want to upload articles. The DEMOJRN submission is an example of submission using this technique.

      Limitations:
      • Requires Javascript enabled users-side + support for the FCKeditor (most "recent" browsers)
      • File revisions and deletions are not supported as such (must be done through other options).

      Setting up a submission to use the FCKeditor is really similar to the strategy described in option 4: the principle is to instantiate a custom "Response Element" that will call a function taking care of the interface, and then plug a WebSubmit function to take care of attaching the files.

      1) Go to your submission page, and add an element: choose the "DEMOJRN_ABSE" response element. But wait! Read further before:

      2) You will want and need to customize the behaviour of the FCKeditor, but you don't want to alter the behaviour of other submissions using this element. The solution is to "clone" this element: create a new element: "Available elements"-> scroll down -> "Add New Element". Choose for e.g. name "DEMO_FCKEDITOR", Element Type-> "Response" and paste the code of the DEMOJRN_ABSE element in the "Element Description" field). Customize the element according to your needs. This will need some development skills and good overview of your metadata and submission in order to have the editor correctly initialized. Additional information can be found in the FCKeditor Integration guide.

      3) Once done, add the "DEMO_FCKEDITOR" element to your page.

      4) Go to your submission functions. Add the Move_FCKeditor_Files_To_Storage function, and configure it so that the input_fields parameter list the name(s) (separated by comma if several instances) given to the FCKeditor instance(s) created in by the DEMO_FCKEDITOR response element.

      Revising/deleting files

      The way this editor is currently used does not let you delete/revise file right from the editor interface. To set up file deletion/revision, combine this technique with option 3 for example.

      5.6 Upload_Photo_interface element instance + Move_Photos_To_Storage function

      This interface is specifically dedicated to pictures: it enables the selection of bunch of photos to upload, and let you preview and comment them before submitting the record.

      Limitations:
      • Requires Javascript enabled users-side + support for the Flash plugin (version >= 9.0.24)
      • Support for deletions, but not revisions

      Setting up a submission to use this interface is really similar to the strategy described in option 4: the principle is to instantiate a custom "Response Element" that will call a function taking care of the interface, and then plug a WebSubmit function to take care of attaching the files.

      1) Go to your submission page, and add an element: choose the "Upload_Photos" response element. But wait! Read further before:

      2)As in other strategies that use a response element to layout the interface, you might want to customize the behaviour of the photos uploader, but you don't want to alter the behaviour of other submissions using this element. If so (though it is not needed in the case of this interface), the solution is to "clone" this element: create a new element: "Available elements"-> scroll down -> "Add New Element". Choose for e.g. name "DEMO_UPLOADPHOTO", Element Type-> "Response" and paste the code of the Upload_Photos element in the "Element Description" field). Customize the element according to your needs. This will need some development skills in order to have the interface correctly customized..

      3) Once done, add the "DEMO_UPLOADPHOTO" (or Upload_Photos if you kept the original file) element to your page.

      4) Go to your submission functions. Add the Move_Photos_To_Storage function, and configure it according to your needs.

      Revising/deleting files

      The interface lets user add or remove files, but cannot specifically revise a file. If needed, it can be combined with another strategy such as option 3.

      5.7 Alternatives: BibDocFile CLI or BibDocFile Web Interface

      These last techniques are not meant to be used in WebSubmit submissions, but are admin tools that can be used to manage files, independently of any submission. They are described here for the sake of completness.

      The BibDocFile command line interface is describe in more details in How to manage your fulltext files through BibDocFile.

      The BibDocFile admin interface gives access to some of the functionalities offered by its command-line equivalent through a graphical web interface. Its interface is similar to the one offered by the Upload_File element or the Create_Upload_Files_Interface function, but is not tied to a specific submission (and therefore won't automatically execute post-processing steps such a stamping).
      Access to the BibDocFile admin interface is restricted via the WebAccess runbibdocfile action.

      6. Access restrictions

      This section focuses on restricting the access to the submission themselves, not to produce content (records, files, etc.) which are restricted. Refer to the adequate document to restrict the collections or files.

      6.1 Admin-level

      Access to the WebSubmit admin interface is controlled via the WebAccess cfgwebsubmit action.

      6.2 User-level

      Access to the submissions is controlled via the WebAccess submit action. The action has the following parameters:

      • doctype: the submission code (eg. DEMOART) for which you want to set restrictions.
      • act: the action (for eg. "SBI") for which you want to set the restriction. Can be * to mean any action for the given submission.
      • categ: the category (for eg. "Article", "Preprint") for which you wan to set the restriction. Can be * to mean any category for the given submission.

      Connect for eg. a role to the submit action with parameters doctype=DEMOART, act=SBI, categ=* to let people of this role submit new documents in the DEMOART submission, in any category.

      If you do not add an authorization for a given submission doctype and action (even an empty role), the submission is open to anybody. For eg. in the above example, provided that an MBI action exists, even with a restricted SBI action anybody will be able to modify existing documents with MBI unless the MBI action is also connected to a role. To make it short: a submission it not restricted until it is...

      Note that it is your responsibility as WebSubmit admin to ensure that your workflow is not modifying records outside the desired scope. Given that records are independant of the submission that created them, there is no mechanism in the WebSubmit engine that prevents the DEMOART submission to modify records created with the DEMOBOOK submission. A check must be added at the level of WebSubmit functions of your submission to make sure that chosen submission and category well match the record to be modified (for eg. retrieved via the Get_Report_Number function)

      .

      All the above checks also do not prevent any authorized user to modify documents submitted by others. To enable finer-grained restrictions, use the WebSubmit function "Is_Original_Submitter" or "User_is_Record_Owner_or_Curator" in your MBI, SRV, etc. submission workflow (for eg. just after the "Get_Recid" function). Check also the Restricting the submission how-to from this guide.

      Terminology

      The document type of a file (doctype)

      The document type is an attribute of a file. It can be seen as a category which lets you organize your files: "Main" file, "Additional", "Figures", "source", whatever you need. It is not so much used excepted on /XXX/files/ pages to group files by category. It can however come handy during file upload processes, to assign different kinds of restrictions based on the document type, or simply to make the configuration of the submission easier, depending on which technique you use to manage files.

      The submission directory (curdir)

      The WebSubmit workflow mainly splits in two parts: data gathering (user interface side, with WebSubmit pages and elements) and data integration part as a second step (with WebSubmit functions involved, plus BibConvert templates). In the middle stands the submission directory (also called "curdir"). Each submission session corresponds to a unique submission directory, which stores the values collected from the submission pages, in the form of a series of textual files, one for each input field. These files are named after the submission WebSubmit elements, and their content is the value input by the submitter. Note that uploaded files are stored in a /files/ subdirectory.

      WebSubmit functions process the files in this directory. For example "Make_Record" which creates the MARCXML (through BibConvert templates), or the Stamp_Uploaded_Files, which will stamp the uploaded files in the /files/ directory. If you happen to write a customized WebSubmit response element that writes files to disk, or implement a WebSubmit function that must retrieve submitted values, you will certainly use the submission directory.

      These submission directories are also helpful to debug submissions, and can act as a backup when something goes wrong during a submission.

      An example of submission directory could be found at this path /opt/invenio/var/data/submit/storage/running/DEMOART/1245135338_62620, where DEMOART is your submission code, and 1245135338_62620 is the submission session ID, as found at the bottom of each WebSubmit web page during the submission process. Just after the user has finished the submission, this directory would contain all the collected values of the form. But the life of the submission directory does not stop there. Immediately after the user completed the submission, the WebSubmit functions are executed: for e.g. (depending on how you have configured your submission) creation of a report number (stored in the submission directory too!) (Function Report_Number_Generation), creation of the MARCXML (usually named "recmysql", in the submission directory again!) (Function Make_Record), upload of the MARCXML (Function Insert_Record) and Move_To_Done. This last function moves the submission directory to a new place. It could be for e.g.: /opt/invenio/var/data/submit/storage/done/DEMOART/DEMO-ARTICLE-2010-001.tar.gz, supposedly that the report number of the submitted record is ARTICLE-2010-001. Some other functions will move the submission directory to other places, and some functions will even let you configure where to move it.

      - End of new WebSubmit admin guide -

      WARNING: OLD WEBSUBMIT ADMIN GUIDE FOLLOWS
      This WebSubmit Admin Guide was written for the previous PHP-based version of the admin tool. The submission concepts and pipeline description remain valid, but the interface snapshot examples would now differ. The guide is to be updated soon.

      Table of Contents

       

       

      General Overview of the Manager Tool

      Things to know before using the Manager:

         This manager tool allows you to administrate all the WebSubmit interface. With it, you will be able to create new actions, new types of documents and edit the existing ones.

         The main objects in webSubmit are the "action" (such as "Submit New Record", "Submit New File", "Modify Record"...) and the "type of document" (such as "preprint", "photo"...).

         To one given type of document can be attached several actions. An action is the addition of two processes:
        • The first one is the data gathering. The manager will allow you to create new web forms corresponding to the fields the user will have to fill in when using webSubmit.
        • The second one is the data treatement. Basically, what the program will do with the data gathered during the first phase. The treatment appears in this tool as a sequence of functions. This manager will allow you to add functions to an action, edit the existing functions, and reorder the functions.

      See also:

    • using the manager through an example
    • interface description
    • actions
    • document types
    •  

       

      Using the manager through an example

      what is this?

        This page presents you the typical situations a user could meet using WebSubmit, and for each situation how to use the manager to configure it.

      The user reaches WebSubmit main page.

      Main Page  To add a document type to WebSubmit, you should go to the main page and click on "New Doctype" in the left blue panel.

       Even once created, a document type will not appear automatically on this page. To configure the list of catalogues and document types displayed on this page, the administrator shall go to the edit catalogues page. (see the guide section)

      The user can then click on the document type he is interested in.

      Document type Page  The text appearing under the header containing the name of the document can be configured by going to the main page, click on the title of the document type then on the "Edit Document Types Details" button.

       You can associate several categories to a document type which can be defined by going to the main page, click on the title of the document type then on the "View Categories" button. The selected category will be saved in a file named "comboXXX" (where XXX is the short name of the document type) in the submission directory.

       To add an action button to this page, first implement this action by going to the main page, click on the title of the document type then on the "Add a new submission" button. If the action is already implemented and the button still does not appear on the submision page, then you should edit the details of this implementation: go to the main page, click on the title of the document type then on the icon in the "Edit Submission" column and in the line of the desired action. There you should set the "Displayed" form field to "YES".

       You can also change the order of the buttons, by going to the main page, click on the title of the document type then on the icon in the "Edit Submission" column and in the line of the desired action. There you can set the "buttonorder" form field.

      The user now may choose a category, then click on the action button he wishes.
      The submission starts, the first page of the web form appears.

      Document type Page  This web form is composed of several pages, on each of these pages form fields can be found. To modify the number of pages, add or withdraw form fields and modify the texts before each form field, you shall go to the main page, click on the title of the document type then on the icon in the "Edit Submission Pages" column and in the line of the desired action. (see the guide section)

      On the last page of the submission, there should be a button like in the following image which will trigger the end script

      Document type End Page  This button is defined like any other form field. Its definition should include a onclick="finish();" javascript attribute.

       After clicking this button, WebSubmit will apply the end script functions to the gathered data. To modify the end script, you shall go to the main page, click on the title of the document type then on the icon in the "Edit Functions" column and in the line of the desired action. (see the guide section)

      See also:

      interface description
      actions
      document types

       

       

      Philosophy behind the document submission system

      This page will explain some philosophical issues behind the document submission system.

      On the relation between a search collection and a submission doctype:

         The relation between a search collection and a submission document type may be prone to certain confusion for Invenio administrators. This comes from the fact that there is no one-to-one direct mapping between them, as is usual elsewhere. The relation is more flexible than that.

         A search collection in Invenio is defined through a search query. For example, "all records where field F contains the value V belong to collection C". Several assertions can be deduced from this definition:
         1/ A single record can appear in several collections.
         2/ There is no limitation to the number of collections in which a record can appear.
         3/ Any query can be used to build a collection. The query can also be a complex one using logical operators, hence can rely on the value of several fields.

         (In addition, a search collection can be defined via a set of its subcollections in the hierarchy tree. Refer to the WebSearch Admin Guide for that matter.)

         The submission system basically creates an XML MARC record and stores it in the database. To which collection this new record belongs depends exclusively on the content of the XML MARC record. This XML MARC record is created by the Make_Record function. So the secret of the matching of a submitted record to a particular collection lies in the configuration of this function. Some examples will clarify this point:

         Example 1: Let's consider a "Preprints" collection which is defined by this query: "980__a:PREPRINT". We want to create a submission document type from which all records will go to this "Preprints" collection. For this, the Make_Record function should be configured so that a 980__a field containing "PREPRINT" will always be created.
         Example 2: Let's still consider the same "Preprints" collection, and an additional "Theses" collection based on a slightly different query "980__a:THESIS". We want to create a single submission type from which the records will go in the "Preprints" or "Theses" collections depending on a field chosen by the submitter. In this case, the Make_Record function should be configured so that a 980__a field will contain either "PREPRINT" or "THESIS" depending on the value entered by the submitter.

         The apparent disconnection between a submission document type and a search collection allows a great flexibility, allowing administrators to create 1 to 1, 1 to n, n to 1 or even 1 to 0 (not very useful!) relations.

       

       

      Interface Description

      Welcome to webSubmit Management tool:

         on the websubmit admin main page you will find:



        • The list of all existing document type in the middle of the page. Click on one line in the list to have access to the main document modification panel
        • The right menu panel with the following links inside:
          • "webSubmit Admin": This links leads you back to the main page of the manager.
          • "New Doctype": Click here if you wish to create a new document type.
          • "Remove Doctype": Click here if you want to remove an existing document type.
          • "Available Actions": Lists all existing actions
          • "Available Javascript Checks": Lists all existing Javascript checking functions.
          • "Available Element Description": Lists all existing html form element descriptions.
          • "Available Functions": Lists all existing functions in CDS Submit.
          • "Organise Main Page": Allows you to manage the appearance and order of the list of document types on CDS Submit User main page.

      See also:

      interface description
      actions
      document types

       

       

      Document Types

         WebSubmit can propose several actions on different document types. Each of these document type may or may not implement all possible actions. The main difference between each document type is the metadata which define each of them, and may also be the kind of fulltext files attached to one record.

         A document type can be one of "Thesis", "Photos", "Videotapes"... or whatever type of document you may invent. A document type is always defined by its metadata. It may or may not have a fulltext file attached to it.

         This tool leaves you free to create the web forms adapted to whatever type of document you want to create (see "Create and Maintain the Web Form") as well as free to determine what treatment you wish to apply to the collected data (see "Create and Maintain the Data Treatment").

      See also:

      add a new type of document
      remove a type of document
      modify a type of document
      implement an action over a type of document

       

       

      Ading new type of document

      How to get there?

       Click on the "New Doctype" link in the webSubmit right menu.

      How to do this?

       A new document type is defined by 6 fields:
      • Creation Date and Modification Dates are generated and modified automatically.
      • Document Type ID: This is the acronym for your new document type. We usually use a 3 letters acronym.
      • Document Type Name: This is the full name of your new document. This is the text which will appear on the list of available documents and catalogues on webSubmit main page.
      • Document Type Description: This is the text which will appear on the document type submission page. This can be pure text or html.
      • Doctype to clone: Here you can choose to create your document type as a clone of another existing document type. If so, the new document type will implement all actions implemented by the chosen one. The web forms will be the same, and the functions also, as well as the values of the parameters for these functions. Of course once cloned, you will be able to modify the implemented actions.

      See also:

    • remove a type of document
    • modify a type of document
    • implement an action over a type of document
    •  

       

      Removing a Document Type

      How to get there?

       Click on the "Remove Doctype" link in the webSubmit admin right menu

      How to do this?

       Select the document type to delete then click on the "Remove Doctype" button. Remember by doing this, you will delete this document type as well as all the implementation of actions for this document type!

      See also:

    • create a type of document
    • modify a type of document
    • implement an action over a type of document
    •  

       

      Modifying a Document Type

      What is it?

       Modifying a document type in webSubmit - this will modify its general data description, not the implementations of the actions on this document type. For the later, please see implement an action over a type of document.

      How to get there?

       From the main page of the manager, click on the title of the document type you want to modify, then click on the "Edit Document Type Details".

      How to do this?

       Once here, you can modify 2 fields:
    • Document Type Name: This is the full name of your new document. This is the text which will appear on the list of available documents and catalogues on webSubmit main page.
    • Document Type Description: This is the text which will appear on the right of the screen when the user moves the mouse over the document type title and on the document type submission page. This can be pure text or html.
    • See also:

    • remove a type of document
    • create a type of document
    • implement an action over a type of document
    •  

       

      Actions

       In webSubmit you can create several actions (for example "Submit New Record", "Submit a New File", "Send to a Distribution List", etc. in fact any action you can imagine to perform on a document stored in your database). The creation of an action is very simple and consists in filling in a name, description and associating a directory to this action. The directory parameter indicates where the collected data will be stored when the action is carried on.

       Once an action is created, you have to implement it over a document type. Implementing an action means defining the web form which will be displayed to a user, and defining the treatment (set of functions) applied to the data which have been gathered. The implementation of the same action over two document types can be very different. The fields in the web form can be different as well as the functions applied at the end of this action.

      See also:

    • create a new action
    • remove an action
    • modify an action
    • implement an action over a type of document
    •  

       

      Adding a New Action

      How to get there?

       Click on the "Available Actions" link in the websubmit right menu, then on the "Add an Action" button.

      How to do this?

       A new action is defined by 6 fields:

      • Creation Date and Modification Dates are generated and modified automatically.
      • Action Code: This is the acronym for your new action. We usually use a 3 letters acronym.
      • Action Description: This is a short description of the new action.
      • dir: This is the name of the directory in which the submission data will be stored temporarily. If the dir value is "running" as for the "Submit New Record" action (SBI), then the submission data for a Text Document (document acronym "TEXT") will be stored in the /opt/invenio/var/data/submit/storage/running/TEXT/9089760_90540 directory (where 9089760_90540 is what we call the submission number. It is a string automatically generated at the beginning of each submission). Once finished, the submission data will be moved to the /opt/invenio/var/data/submit/storage/done/running/TEXT/ directory by the "Move_to_Done" function.
      • statustext: text displayed in the status bar of the browser when the user moves his mouse upon the action button.

      See also:

    • remove an action
    • modify an action
    • implement an action over a type of document
    •  

       

      Removing an Action

      What is it?

       Removing the implementation of an action over a document type - Please note the removal of the action itself is not allowed with this tool.

      How to get there?

       From the websubmit admin main page, click on the title of the relevant document type. Then click on the red cross corresponding to the line of the action you want to remove.

      See also:

    • create an action
    • modify an action
    • implement an action over a type of document
    •  

       

      Modifying an Action

      What is it?

       This page is about how to modify the general data about an action - for modifying the implementation of an action over a document type, see implement an action over a type of document

      How to get there?

       Click on the "View Actions" link in the right menu of the websubmit admin, then on the title of the action you want to modify...

      How to do this?

       You may modify 3 fields:
      • Action Description: This is a short description of the new action.
      • dir: This is the name of the directory in which the submission data will be stored temporarily. See the meaning of this parameter in create an action.
      • statustext: text displayed in the status bar of the browser when the user moves his mouse upon the action button.

      See also:

    • remove an action
    • create an action
    • implement an action over a type of document
    •  

       

      Implement an action over a document type

      What is it?

       Implement an action over a document type. Create the web forms and the treatment process.

      How to get there?

       From the main page of the manager, click on the title of the relevant document type.
      Then click on the "Add a New Submission" button.

      How to do this?

       Just select the name of the action you want to implement. When you select an action, the list of document which already implement this action appears. Then you can select from this list the document from which you want to clone the implementation, or just choose "No Clone" if you want to build this implementation from scratch.

       After selecting the correct fields, click on the "Add Submission" button.

       You then go back to the document type manager page where you can see that in the bottom array your newly implemented action appears (check the acronym in the first column).



      • Clicking on the action acronym will allow you to modify the general data about the action (remember in this case that all the other implementations of this particular action will also be changed).
      • The second column indicates whether the button representing this action will appear on the submission page.
      • The third column shows you the number of pages composing the web form for this implementation. (see create and maintain the web form).
      • The 4th and 5th columns indicate the creation and last modification dates for this implementation.
      • In the 6th column, you can find the order in which the button will be displayed on the submission page of this document type.
      • The following 4 columns (level, score, stpage, endtxt) deal with the insertion of this action in an action set.


        An action set is a succession of actions which should be done in a given order when a user starts.
        For example the submission of a document is usually composed of two actions: Submission of Bibliographic Information (SBI) and Fulltext Transfer (FTT) which should be done one after the other.
        When the user starts the submission, we want CDS Submit to get him first in SBI and when he finishes SBI to carry him to FTT.
        SBI and FTT are in this case in the same action set.
        They will both have a level of 1 ("level" is a bad name, it should be "action set number"), SBI will have a score of 1, and FTT a score of 2 (which means it will be started after SBI). If you set the stpage of FTT to 2, the user will be directly carried to the 2nd page of the FTT web form. This value is usually set to 1.
        The endtxt field contains the text which will be display to the user at the end of the first action (here it could be "you now have to transfer your files")

        A single action like "Modify Bibliographic Information" should have the 3 columns to 0,0 and 1.
         


      • Click on the icon in the 12th column ("Edit Submission Pages") to create or edit the web form.
      • Click on the icon in the 13th column ("Edit Functions") to create or edit the function list.
      • The "Edit Submission" column allows you to modify the data (level, status text...) for this implementation.
      • Finally the last column allows you to delete this implementation.
         

       If you chose to clone the implementation from an existing one, the web form as well as the functions list will already be defined. Else you will have to create them from scratch.

      See also:

    • create and maintain the web form
    • create and maintain the data treatment
    •  

       

      Create and maintain the web form

      What is it?

       Create and define the web form used during an action.

      How to get there?

       From the main page of the manager, click on the title of the relevant document type. Then click on the icon in the "Edit Submission Pages" column of the relevant line.

      List of the form pages

       A web form can be split over several pages. This is a matter of easiness for the user: he will have an overview of all form fields present on the page without having to scroll it. Moreover, each time the user goes from one page to the other, all entered data are saved. If he wants to stop then come back later (or if the browser crashes!) he will be able to get back to the submission at the exact moment he left it.

       Once here:



      you can see the ordered list of already existing pages in the web form. In this example there are 4 pages. You can then:
      • Move one page from one place to an other, using the small blue arrows under each page number.
      • Suppress one page by clicking on the relevant red cross.
      • Add a page, by clicking the "ADD A PAGE" button!
      • Edit the content of one page by clicking on the page number.
      • Go back to the document main page.

      Edit one form page

       Click on a page number, you then arrive to a place where you can edit this form page.

       A form page is composed of a list of form elements. Each of these form elements is roughly made of an html template and a text displayed before the form field.

       In the first part of the page, you have a preview of what the form will look like to the user:


       Then the second table shows you the list of the form elements present on the page:


       You can then:
      • Move one element from one place to another using the drop-down menus in the first column ("Item No") of the table, or the little blue arrows in the second column.
      • Edit the html template of one form element by clicking on the name of the template in the 3rd column ("Name").
      • Edit one of the form elements by clicking on the icon in the 10th column.
      • delete one form element by clicking on the relevant red cross.
      • Add an element to the page by clicking the "ADD ELEMENT TO PAGE" button.

      Edit the html template of one form element

       In the html template edition page, you can modify the following values:
      • Element type: indicates which html form element to create
      • Aleph code: Aleph users only! - This indicates in which field of the Aleph document database to retrieve the original value when modifying this information (function Create_Modify_Interface of action MBI).
      • Marc Code: MySQL users only! - This indicates in which field of the MySQL document database to retrieve the original value when modifying this information (function Create_Modify_Interface of action MBI).
      • Cookies: indicates whether WebSubmit will set a cookie on the value filled in by the user. If yes, next time the user will come to this submission, the value he has entered last time will be filled in automatically. Note: This feature has been REMOVED.
      • other fields: The other fields help defining the html form element.
      Important warning! Please remember this is a template! This means it can be used in many different web forms/implementations. When you modify this template the modification will take place in each of the implementations this template has been used.

      Edit one form element

       In the form element edition page, you may modify the following values:
      • element label: This is the text displayed before the actual form field.
      • level: can be one of "mandatory" or "optional". If mandatory, the user won't be able to leave this page before filling this field in.
      • short desc: This is the text displayed in the summary window when it is opened.
      • Check: Select here the javascript checking function to be applied to the submitted value of this field
      • Modify Text: This text will be displayed before the form field when modifying the value (action "Modify Record", function "Create_Modify_Interface")

      Add one form element

       Click on the "ADD ELEMENT TO PAGE" button. There you will have to decide which html template field to use ("Element Description code"), and also the field mentioned above.

      Create a new html template

       You have access to the list of all existing html templates by clicking on the "View element descriptions" link in the websubmit admin right menu.
      By clicking on one of them, you will have access to its description.
      If no template corresponds to the one you seek, click on the "ADD NEW ELEMENT DESCRIPTION" button to create one.
       The fields you have to enter in the creation form are the one described in the Edit the html template of one form element section.
      You also have to choose a name for this new element.
      IMPORTANT! The name you choose for your html element is also the name of the file in which webSubmit will save the value entered in this field. This is also the one you will use in your BibConvert configuration. Bibconvert is the program which will convert the data gathered in webSubmit in a formatted XML file for insertion in the documents database.
       Tips:
    • Elements of type "select box" which are used as a mandatory field in a form must start with "<option>Select:</option>"
    • Create and edit a checking function.

       Click on the "View Checks" link in the websubmit admin right menu. You then have access to a list of all the defined javascript functions.
      You can then click on the name of the function you want to modify, or click on the "ADD NEW CHECK" button to create a new javascript function.
      These functions are inserted in the web page when the user is doing his submission. When he clicks on "next page", this function will be called with the value entered by the user as a parameter. If the function returns false, the page does not change and an error message should be output. If the function returns true, everything is correct, so page can be changed.

      See also:

    • create and maintain the data treatment
    •  

       

      Setup the Data Treatment

      What is it?

       At the end of a submission, we have to tell webSubmit what to do with the data it has gathered. This is expressed through one or several lists of functions (we call this the "end script").

      How to get there?

       From the main page of the manager, click on the title of the relevant document type.
      Then click on the icon in the "Edit Functions" column of the relevant line.

      List of functions

       Here is what you may see then (this is the end script list of functions for a document type named "TEST" and action "FTT" - Fulltext Transfer):



       You can see the ordered list of all the functions in the end script. This end script is composed of 2 steps (see the "step" column). The functions composing the first step are called, then there should be action from the user which would trigger step 2 - in the present case the Upload_Files function (last of step 1) allows the user to upload additional files by creating a web form, then when the user finishes, he presses another button created by the function, which ends the process. Functions of step 2 are then called.

       Why implement multiple steps? The reason can vary with the task you want to accomplish. For example with the example above (Fulltext Transfer), we use the first step to allow the upload of multiple additional files (dynamic action) which could not be done in the static web form. In the case of the "Modify Bibliographic Information" action, the first step is used to display the fields the user wants to modify, prefilled with the existing values. The reason is once again that the task we want to realise is dynamic.

       The "score" column is used to order the functions. The function which has the smallest score will be called first, and the largest score will be called last.

       You can then:
      • View and edit the parameters of each function by clicking on the name of the function.
      • Move one function up and down, by using the small blue arrows.
      • Suppress one function by clicking on the relevant red cross.
      • Add a function to the list by clicking the "ADD FUNCTION" button.
      • Go back to the document main page ("FINISHED" button).
       Please note: To pass one function from one step to another, you have to delete it then add it again in the proper step.

      7. The skeleton of your submission is now basically ready. You will need to add new pages to it, as well as insert post-processing functions. These steps are defined in the next sections. What you can do now is to make the submission visible on the main submissions users page. To do so, click on the Organise Main Page of the main menu, select your submission in the "Document Type Name" menu, choose from the next menu to which branch of the submission tree you want to attach this submission, and press "Add". Reorganize the tree as wanted from this interface.

    See also:

  • all about functions
  •  

     

    Functions

    Description:

     In webSubmit, each action process is divided into two phases: the gathering of data (through a web form) and the treatment of the data.

     The treatment is organised in a succession of functions, each of which has its own input and output.

     The functions themselves are stored in separate files (one per function) in the /opt/invenio/lib/python/invenio/websubmit_functions directory. A file containing a function MUST be named after the function name itself. For example, a function called "Move_to_Done" MUST be stored in a file called Move_to_Done.py. The case is important here.

     For a description of what should be inside the file, have a look to the "create a new function" page of this guide.

     To each function you can associate one or several parameters, which may have different values according to the document type the function is used for. One parameter may be used for different functions. For example one standard parameter used in several functions is called "edsrn". It contains the name of the file in which the reference of the document is stored.

    See also:

  • create a new function
  • delete a function
  • edit a function
  •  

     

    Creating a New Function

    How to get there?

     Click on the "Available Functions" link in the websubmit admin right menu. Then click on the "Add New Function" button.

    How to do this?

     Enter the name of the new function as well as a text description if you wish.
     You will then reach a page where you can add parameters to your new function.

     Don't forget to add the function file inside the /opt/invenio/lib/python/invenio/websubmit_functions directory and to name the file after the function. Functions must be written in Python. Here is an example implementation of a function:

    /opt/invenio/lib/python/invenio/websubmit_functions/Get_Report_Number.py:

    def Get_Report_Number (parameters,curdir,form): global rn #Path of file containing report number if os.path.exists("%s/%s" % (curdir,parameters['edsrn'])): fp = open("%s/%s" % (curdir,parameters['edsrn']),"r") rn = fp.read() rn = rn.replace("/","_") rn = re.sub("[\n\r ]+","",rn) else: rn = "" return ""

    The function parameters are passed to the function through the parameters dictionary.
    The curdir parameter contains the current submission directory path.
    The form parameter contains the form passed to the current web page for possible reference from inside the function.

    See also:

  • edit a function
  • delete a function
  •  

     

    Removing a Function

    Note

     There are currently no way of deleting a function through this interface. Use the direct MySQL command line interface for this.

    See also:

  • edit a function
  • create a function
  •  

     

    Editing a Function

    What is it?

     Edit a function, add parameters to it...

    How to get there?

     Click on the "Available Functions" link in the websubmit admin right menu.

    How to do this?

     On this page appears a list of all functions defined into the system. Two columns give you access to some features:
    • View function usage Click here to have access to the list of all document types and all actions in which this function is used. Then by clicking on one of the items, you will be given a chance to modify the parameters value for the given document type.
    • View/Edit function details There you will be able to modify the function description, as well as add/withdraw parameters for this function.

    See also:

  • create a new function
  • delete a function
  •  

     

    All functions explained

    Description:

     This page lists and explains all the functions used in the demo provided with the Invenio package. This list is not exhaustive since you can add any new function you need.
     Click on one function name to get its description.
     Please note in this page when we refer to [param] this means the value of the parameter 'param' for a given document type.

    CaseEDS
    Create_Modify_Interface
    Create_Recid
    Finish_Submission
    Get_Info
    Get_Recid
    Get_Report_Number
    Get_Sysno
    Get_TFU_Files
    Insert_Modify_Record
    Insert_Record
    Is_Original_Submitter
    Is_Referee
    Mail_Submitter
    Make_Modify_Record
    Make_Record
    Move_From_Pending
    Move_to_Done
    Move_to_Pending
    Print_Success
    Print_Success_APP
    Print_Success_MBI
    Print_Success_SRV
    Report_Number_Generation
    Send_Approval_Request
    Send_APP_Mail
    Send_Modify_Mail
    Send_SRV_Mail
    Test_Status
    Update_Approval_DB
    Upload_Files


    CaseEDS
    description
    This function may be used if the treatment to be done after a submission depends on a field entered by the user. Typically this is used in an approval interface. If the referee approves then we do this. If he rejects, then we do other thing.
    More specifically, the function gets the value from the file named [casevariable] and compares it with the values stored in [casevalues]. If a value matches, the function directly goes to the corresponding step stored in [casesteps]. If no value is matched, it goes to step [casedefault].
    parameters
    casevariable This parameters contains the name of the file in which the function will get the chosen value.
    Eg: "decision"
    casevalues Contains the list of recognized values to match with the chosen value. Should be a comma separated list of words.
    Eg: "approve,reject"
    casesteps Contains the list of steps corresponding to the values matched in [casevalue]. It should be a comma separated list of numbers
    Eg: "2,3"
    In this example, if the value stored in the file named "decision" is "approved", then the function launches step 2 of this action. If it is "reject", then step 3 is launched.
    casedefault Contains the step number to go by default if no match is found.
    Eg: "4"
    In this example, if the value stored in the file named "decision" is not "approved" nor "reject", then step 4 is launched.


    Create_Modify_Interface
    description
    To be used in the MBI-Modify Record action. It displays a web form allowing the user to modify the fields he chose. The fields are prefilled with the existing values extracted from the documents database. This functions takes the values stored in the [fieldnameMBI] file. This file contains a list of field name separated with "+" (it is usually generated from a multiple select form field). Then the function retrieves the corresponding tag name (marc-21) stored in the element definition. Finally it displays the web form and fills it with the existing values found in the documents database.
    parameters
    fieldnameMBI Contains the name of the file in which the function will find the list of fields the user wants to modify. Depends on the web form configuration.


    Create_Recid
    description
    This function retrieves a new record id from the records database. This record id will then be used to create the XML record afterwards, or to link with the fulltext files. The created id is stored in a file named "SN".
    parameters
    none


    Finish_Submission
    description
    This function stops the data treatment process even if further steps exist. This is used for example in the approval action. In the first step, the program determines whether the user approved or rejected the document (see CaseEDS function description). Then depending on the result, it executes step 2 or step 3. If it executes step 2, then it should continue with step 3 if nothing stopped it. The Finish_Submission function plays this role.
    parameters
    none


    Get_Info
    description
    This function tries to retrieve in the "pending" directory or directly in the documents database, some information about the document: title, original submitter's email and author(s).
    If found, this information is stored in 3 global variables: $emailvalue, $titlevalue, $authorvalue to be used in other functions.
    If not found, an error message is displayed.
    parameters
    authorFile Name of the file in which the author may be found if the document has not yet been integrated (in this case it is still in the "pending" directory).
    emailFile Name of the file in which the email of the original submitter may be found if the document has not yet been integrated (in this case it is still in the "pending" directory).
    titleFile Name of the file in which the title may be found if the document has not yet been integrated (in this case it is still in the "pending" directory).


    Get_Recid
    description
    This function searches for the document in the database and stores the recid of this document in the "SN" file and in a global variable "sysno".
    The function conducts the search based upon the document's report-number (and relies upon the global variable "rn") so the "Get_Report_Number" function should be called before this one.
    This function replaces the older function "Get_Sysno".
    parameters
    none


    Get_Report_Number
    description
    This function gets the value contained in the [edsrn] file and stores it in the reference global variable.
    parameters
    edsrn Name of the file which stores the reference.
    This value depends on the web form configuration you did. It should contain the name of the form element used for storing the reference of the document.


    Get_Sysno
    description
    This function searches for the document in the database and stores the system number of this document in the "SN" file and in a global variable.
    "Get_Report_Number" should be called before.
    Deprecated: Use Get_Recid instead.
    parameters
    none


    Insert_Modify_Record
    description
    This function gets the output of bibconvert and uploads it into the MySQL bibliographical database.
    parameters
    none


    Insert_Record
    description
    This function gets the output of bibFormat and uploads it into the MySQL bibliographical database.
    parameters
    none


    Is_Original_Submitter
    description
    If the authentication module (login) is active in webSubmit, this function compares the current login with the email of the original submitter. If it is the same (or if the current user has superuser rights), we go on. If it differs, an error message is issued.
    parameters
    none


    Is_Referee
    description
    This function checks whether the currently logged user is a referee for this document.
    parameters
    none


    Mail_Submitter
    description
    This function send an email to the submitter to warn him the document he has just submitted has been correctly received.
    parameters
    authorfile Name of the file containing the authors of the document
    titleFile Name of the file containing the title of the document
    emailFile Name of the file containing the email of the submitter of the document
    status Depending on the value of this parameter, the function adds an additional text to the email.
    This parameter can be one of:
    ADDED: The file has been integrated in the database.
    APPROVAL: The file has been sent for approval to a referee.
    or can stay empty.
    edsrn Name of the file containing the reference of the document
    newrnin Name of the file containing the 2nd reference of the document (if any)


    Make_Modify_Record
    description
    This function creates the record file formatted for a direct insertion in the documents database. It uses the BibConvert tool.
    The main difference between all the Make_..._Record functions are the parameters.
    As its name says, this particular function should be used for the modification of a record. (MBI- Modify Record action).
    parameters
    modifyTemplate Name of bibconvert's configuration file used for creating the mysql record.
    sourceTemplate Name of bibconvert's source file.


    Make_Record
    description
    This function creates the record file formatted for a direct insertion in the documents database. It uses the BibConvert tool.
    The main difference between all the Make_..._Record functions are the parameters.
    As its name does not say :), this particular function should be used for the submission of a document.
    parameters
    createTemplate Name of bibconvert's configuration file used for creating the mysql record.
    sourceTemplate Name of bibconvert's source file.


    Move_From_Pending
    description
    This function retrieves the data of a submission which was temporarily stored in the "pending" directory (waiting for an approval for example), and moves it to the current action directory.
    parameters
    none


    Move_to_Done
    description
    This function moves the existing submission directory to the /opt/invenio/var/data/submit/storage/done directory. If the Then it tars and gzips the directory.
    parameters
    none


    Move_to_Pending
    description
    This function moves the existing submission directory to the /opt/invenio/var/data/submit/storage/pending directory. It is used to store temporarily this data until it is approved or...
    parameters
    none


    Print_Success
    description
    This function simply displays a text on the screen, telling the user the submission went fine. To be used in the "Submit New Record" action.
    parameters
    status Depending on the value of this parameter, the function adds an additional text to the email.
    This parameter can be one of:
    ADDED: The file has been integrated in the database.
    APPROVAL: The file has been sent for approval to a referee.
    or can stay empty.
    edsrn Name of the file containing the reference of the document
    newrnin Name of the file containing the 2nd reference of the document (if any)


    Print_Success_APP
    description
    This function simply displays a text on the screen, telling the referee his decision has been taken into account. To be used in the Approve (APP) action.
    parameters
    none


    Print_Success_MBI
    description
    This function simply displays a text on the screen, telling the user the modification went fine. To be used in the Modify Record (MBI) action.
    parameters
    none


    Print_Success_SRV
    description
    This function simply displays a text on the screen, telling the user the revision went fine. To be used in the Submit New File (SRV) action.
    parameters
    none


    Report_Number_Generation
    description
    This function is used to automatically generate a reference number.
    After generating the reference, the function saves it into the [newrnin] file and sets the global variable containing this reference.
    parameters
    autorngen If set to "Y": The reference number is generated.
    If set to "N": The reference number is read from a file ([newrnin])
    If set to "A": The reference number will be the access number of the submission.
    counterpath indicates the file in which the program will find the counter for this reference generation.
    The value of this parameter may contain one of:
    "<PA>categ</PA>": in this case this string is replaced with the content of the file [altrnin]
    "<PA>yy</PA>": in this case this string is replaced by the current year (4 digits) if [altyeargen] is set to "AUTO", or by the content of the [altyeargen] file in any other case. (this content should be formatted as a date (dd/mm/yyyy).
    "<PA>file:name_of_file</PA>": in this case, this string is replaced by the first line of the given file
    "<PA>file*:name_of_file</PA>": in this case, this string is replaced by all the lines of the given file, separated by a dash ('-') character.
    rnformat This is the format used by the program to create the reference. The program computes the value of the parameter and appends a "-" followed by the current value of the counter increased by 1.
    The value of this parameter may contain one of:
    "<PA>categ</PA>": in this case this string is replaced with the content of the file [altrnin]
    "<PA>yy</PA>": in this case this string is replaced by the current year (4 digits) if [altyeargen] is set to "AUTO", or by the content of the [altyeargen] file in any other case. (this content should be formatted as a date (dd/mm/yyyy).
    "<PA>file:name_of_file</PA>": in this case, this string is replaced by the first line of the given file
    "<PA>file*:name_of_file</PA>": in this case, this string is replaced by all the lines of the given file, separated by a dash ('-') character.
    rnin This parameter contains the name of the file in which the program will find the category if needed. The content of thif file will then replace the string <PA>categ</PA> in the reference format or in the counter path.
    yeargen This parameter can be one of:
    "AUTO": in this case the program takes the current 4 digit year.
    "<filename>": in this case the program extract the year from the file which name is <filename>. This file should contain a date (dd/mm/yyyy).
    edsrn Name of the file in which the created reference will be stored.


    Send_Approval_Request
    description
    This function sends an email to the referee in order to start the simple approval process.
    This function is very CERN-specific and should be changed in case of external use.
    Must be called after the Get_Report_Number function.
    parameters
    addressesDAM email addresses of the people who will receive this email (comma separated list). this parameter may contain the <CATEG> string. In which case the variable computed from the [categformatDAM] parameter replaces this string.
    eg.: "<CATEG>-email@cern.ch"
    categformatDAM contains a regular expression used to compute the category of the document given the reference of the document.
    eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference of the document is "TEST-CATEGORY1-2001-001", then the computed category equals "CATEGORY1"
    authorfile name of the file in which the authors are stored
    titlefile name of the file in which the title is stored.
    directory parameter used to create the URL to access the files.


    Send_APP_Mail
    description
    Sends an email to warn people that a document has been approved.
    parameters
    addressesAPP email addresses of the people who will receive this email (comma separated list). this parameter may contain the <CATEG> string. In which case the variable computed from the [categformatAFP] parameter replaces this string.
    eg.: "<CATEG>-email@cern.ch"
    categformatAPP contains a regular expression used to compute the category of the document given the reference of the document.
    eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference of the document is "TEST-CATEGORY1-2001-001", then the computed category equals "CATEGORY1"
    newrnin Name of the file containing the 2nd reference of the approved document (if any).
    edsrn Name of the file containing the reference of the approved document.


    Send_Modify_Mail
    description
    This function sends an email to warn people a document has been modified and the user his modifications have been taken into account..
    parameters
    addressesMBI email addresses of the people who will receive this email (comma separated list).
    fieldnameMBI name of the file containing the modified fields.
    sourceDoc Long name for the type of document. This name will be displayed in the mail.
    emailfile name of the file in which the email of the modifier will be found.


    Send_SRV_Mail
    description
    This function sends an email to warn people a revision has been carried out.
    parameters
    notefile name of the file in which the note can be found
    emailfile name of the file containing the submitter's email
    addressesSRV email addresses of the people who will receive this email (comma separated list). this parameter may contain the <CATEG> string. In which case the variable computed from the [categformatDAM] parameter replaces this string.
    eg.: "<CATEG>-email@cern.ch"
    categformatDAM contains a regular expression used to compute the category of the document given the reference of the document.
    eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference of the document is "TEST-CATEGORY1-2001-001", then the computed category equals "CATEGORY1"


    Test_Status
    description
    This function checks whether the considered document has been requested for approval and is still waiting for approval. It also checks whether the password stored in file "password" of the submission directory corresponds to the password associated with the document..
    parameters
    none


    Update_Approval_DB
    description
    This function updates the approval database when a document has just been approved or rejected. It uses the [categformatDAM] parameter to compute the category of the document.
    Must be called after the Get_Report_Number function.
    parameters
    categformatDAM It contains the regular expression which allows the retrieval of the category from the reference number.
    Eg: if [categformatDAM]="TEST-<CATEG>-.*" and the reference is "TEST-CATEG1-2001-001" then the category will be recognized as "CATEG1".


    Upload_Files
    description
    This function displays the list of already transfered files (main and additional ones), and also outputs an html form for uploading other files (pictures or fulltexts).
    parameters
    maxsize Maximum allowed size for the transfered files (size in bits)
    minsize Minimum allowed size for the transfered files (size in bits)
    iconsize In case the transfered files are pictures (jpg, gif or pdf), the function will automatically try to create icons from them. This parameter indicates the size in pixel of the created icon.
    type This can be one of "fulltext" or "picture". If the type is set to "picture" then the function will try to create icons (uses the ImageMagick's "convert" tool)

    See also:

  • create a new function
  • delete a function
  • edit a function
  •  

     

    Protection and Restriction

    Description:

     In webSubmit, you can restrict the use of some actions on a given document type to a list of users. You can use the webAccess manager for this.

     Let's say you want to restrict the submission of new TEXT documents to a given user. You should then create a role in webAccess which will authorize the action "submit" over doctype "TEXT" and act "SBI" (Submit new record). You can call this role "submitter_TEXT_SBI" for example. Then link the role to the proper users.
     Another example: if you wish to authorize a user to Modify the bibliographic data of PICT documents, you have to create a role which authorize the action "submit" over doctype "PICT" and act "MBI". This role can be called "submitter_PICT_MBI" or whatever you want.

     If no role is defined for a given action and a given document type, then all users will be allowed to use it.

     

     

    Submission Catalogue Organisation

    What is it?

     This feature allows you to organise the way webSubmit main page will look like. You will be able to group document types inside catalogues and order the catalogues the way you wish.

    How to get there?

     Click on the "Organisation" link in the websubmit admin right menu.

    How to do this?

     Once on the "Edit Catalogues page", you will find the currently defined organisation chart in the middle of the page. To the right, one form allows you to create a new catalogue ("Add a Catalogue") and one to add a document type to an existing catalogue ("Add a document type").
     
    • To add a catalogue: Enter the name of your new catalogue in the "Catalogue Name" free text field then choose to which existing catalogue this one will be attached to. If you attach the new one to an already existing catalogue, you can create a sub-catalogue. To actually create it, click on "ADD".
    • To add a document type to a catalogue: Choose in the list of existing "Document type names" the one you want to add to the chart. Then choose to which catalogue the document type will be associated. Click on "ADD" to finalise this action.
    • To withdraw a document type or a catalogue from the chart: Click on the red cross next to the item you want to withdraw. If you withdraw a catalogue all document types attached to it will be withdrawn also (of course the actual document types in webSubmit won't be destroyed!).
    • To move a document type or a catalogue in the chart: Use the small up and down arrows next to the document type/catalogue title.

    See also:

  • Create a New Document Type
  • document types
  •  

     

    BibConvert

    What is it?

     WebSubmit stores the data gathered during a submission in a directory. In this directory each file corresponds to a field saved during the submission.
     BibConvert is used to create a formatted file which will be easy to upload in the bibliographical database from this directory.
     This BibConvert program is called from the Make_Record and Make_Modify_Record functions from the end script system of webSubmit.
     The BibConvert configuration files used by webSubmit are in the /bibconvert/config directory.

     For more info about bibconvert, please see the dedicated guide.

     

     

    FAQ

     Q1. I'd like to be warned each time there is an error, or an important action is made through the manager. Is this possible?
     Q2. Where are all the files stored in this system?
     Q3. How is the documents archive organised?



     Q1. I'd like to be warned each time there is an error, or an important action is made through the manager. Is this possible?
    Yes, it is. Edit the invenio-local.conf file, the "CFG_SITE_ADMIN_EMAIL" definition and set it to your email address. You will then receive all the warning emails issued by the manager.
     Q2. Where are all the files stored in this system?
  • the counter files are here: /opt/invenio/var/data/submit/counters. There are used by the Report_Number_Generation function.
  • all running and completed submissions are stored here: /opt/invenio/var/data/submit/storage.
  • all the document files attached to records are stored here: /opt/invenio/var/data/files.
  • all python functions used by webSubmit are stored here: /opt/invenio/lib/python/invenio/websubmit_functions
  •  Q3. How is the documents archive organised?
    First of all, the documents files attached to records are stored here: /opt/invenio/var/data/files.

    The Upload_Files webSubmit function is used to link a document with a record.

    All documents get an id from the system and are stored in the "bibdoc" table in the database. The link between a document and a record is stored using the "bibdoc_bibrec" table.

    The document id is used to determine where the files are stored. For example the files of document #14 will be stored here: /opt/invenio/var/data/files/g0/14

    The subdirectory g0 is used to split the documents accross the filesystem. The CFG_FILE_DIR_SIZE variable from invenio.conf determines how many documents will be stored under one subdirectory.

    Several files may be stored under the same document directory: they are the different formats and versions of the same document. Versions are indicated by a string of the form ";1.0" concatenated to the name of the file.

    Please see the HOWTO Manage Fulltext Files for more information on the administrative command line tools available to manipulate fulltext files.

    See also:

    notes
    diff --git a/modules/websubmit/lib/functions/Mail_Approval_Request_to_Committee_Chair.py b/modules/websubmit/lib/functions/Mail_Approval_Request_to_Committee_Chair.py index 38bb3a7d0..b31801cfb 100644 --- a/modules/websubmit/lib/functions/Mail_Approval_Request_to_Committee_Chair.py +++ b/modules/websubmit/lib/functions/Mail_Approval_Request_to_Committee_Chair.py @@ -1,163 +1,164 @@ ## $id: Mail_Approval_Request_to_Committee_Chair.py,v 0.01 2008/07/25 18:33:44 tibor Exp $ ## This file is part of Invenio. ## Copyright (C) 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" ## ## Name: Mail_Approval_Request_to_Committee_Chair.py ## Description: function Mail_Approval_Request_to_Committee_Chair.py ## This function sends a confirmation email to the Committee Chair ## when approval for a document is requested. ## Author: T.Baron (first); C.Parker ## ## PARAMETERS: authorfile: name of the file containing the author ## titleFile: name of the file containing the title ## emailFile: name of the file containing the email ## status: one of "ADDED" (the document has been integrated ## into the database) or "APPROVAL" (an email has ## been sent to a referee - simple approval) ## edsrn: name of the file containing the reference ## newrnin: name of the file containing the 2nd reference ## (if any) ## from invenio.config import CFG_SITE_NAME, \ CFG_SITE_URL, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_RECORD from invenio.mailutils import send_email from invenio.access_control_admin import acc_get_role_id, acc_get_role_users -from invenio.search_engine import search_pattern, get_fieldvalues +from invenio.search_engine import search_pattern +from invenio.search_engine_utils import get_fieldvalues from invenio.dbquery import run_sql #Copied from publiline def get_brief_doc_details_from_repository(reportnumber): """Try to get some brief details about the submission that is awaiting the referee's decision. Details sought are: title + Authors + recid (why?) + report-number (why?) This function searches in the Invenio repository, based on "reportnumber" for a record and then pulls the interesting fields from it. @param reportnumber: (string) - the report number of the item for which details are to be recovered. It is used in the search. @return: (dictionary or None) - If details are found for the item, they will be returned in a dictionary structured as follows: { 'title' : '-', ## String - the item's title 'recid' : '', ## String - recid taken from the SN file 'report-number' : '', ## String - the item's report number 'authors' : [], ## List - the item's authors } If no details were found a NoneType is returned. """ ## Details of the pending document, as found in the repository: pending_doc_details = None ## Search for records matching this "report number" found_record_ids = list(search_pattern(req=None, \ p=reportnumber, \ f="reportnumber", \ m="e")) ## How many records were found? if len(found_record_ids) == 1: ## Found only 1 record. Get the fields of interest: pending_doc_details = { 'title' : '-', 'recid' : '', 'report-number' : '', 'authors' : [], } recid = found_record_ids[0] ## Authors: first_author = get_fieldvalues(recid, "100__a") for author in first_author: pending_doc_details['authors'].append(author) other_authors = get_fieldvalues(recid, "700__a") for author in other_authors: pending_doc_details['authors'].append(author) ## Title: title = get_fieldvalues(recid, "245__a") if len(title) > 0: pending_doc_details['title'] = title[0] else: ## There was no value for title - check for an alternative title: alt_title = get_fieldvalues(recid, "2641_a") if len(alt_title) > 0: pending_doc_details['title'] = alt_title[0] ## Record ID: pending_doc_details['recid'] = recid ## Report Number: reptnum = get_fieldvalues(recid, "037__a") if len(reptnum) > 0: pending_doc_details['report-number'] = reptnum[0] elif len(found_record_ids) > 1: ## Oops. This is unexpected - there shouldn't be me multiple matches ## for this item. The old "getInAlice" function would have simply ## taken the first record in the list. That's not very nice though. ## Some kind of warning or error should be raised here. FIXME. pass return pending_doc_details def Mail_Approval_Request_to_Committee_Chair(parameters, curdir, form, user_info=None): """ This function sends a confirmation email to the Committee Chair when approval for a document is requested. """ FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) # retrieve useful information from webSubmit configuration res = run_sql("select * from sbmCPLXAPPROVAL where rn=%s", (rn, )) categ = res[0][1] pubcomchair_address = "" # Try to retrieve the committee chair's email from the referee's database for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_%s" % (res[0][0],categ))): pubcomchair_address += user[1] #Get the document details from the repository - use the function in publiline.py item_details = get_brief_doc_details_from_repository(rn) #Generate the author list authors = "" for element in item_details['authors']: authors += element + ", " message = """ The document %s has been published as a Communication. Please select an appropriate referee for this document. Title: %s Author(s): %s To access the document(s), select the file(s) from the location: <%s/%s/%s> To select a referee, please go to: <%s/publiline.py?flow=cplx&doctype=%s&categ=%s&apptype=%s&RN=%s&ln=en> --------------------------------------------- Best regards. The submission team.""" % (rn,item_details['title'],authors,CFG_SITE_URL,CFG_SITE_RECORD,sysno,CFG_SITE_URL,res[0][0],res[0][1],res[0][3],rn) # send the mail send_email(FROMADDR,pubcomchair_address,"Request for Referee Selection : Document %s" % rn, message,footer="") return "" diff --git a/modules/websubmit/lib/functions/User_is_Record_Owner_or_Curator.py b/modules/websubmit/lib/functions/User_is_Record_Owner_or_Curator.py index d7737c47e..96f987dc3 100644 --- a/modules/websubmit/lib/functions/User_is_Record_Owner_or_Curator.py +++ b/modules/websubmit/lib/functions/User_is_Record_Owner_or_Curator.py @@ -1,228 +1,228 @@ ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Test whether the user is an owner or curator of a record and based on this, either prevent them from working with it, or exit silently allowing processing on the submission to continue. """ __revision__ = "$Id$" import os from invenio.config import CFG_CERN_SITE -from invenio.search_engine import get_fieldvalues +from invenio.search_engine_utils import get_fieldvalues from invenio.websubmit_config import InvenioWebSubmitFunctionStop from invenio.access_control_engine import acc_authorize_action from invenio.access_control_admin import acc_get_role_id, acc_is_user_in_role ## The field in which to search for the record submitter/owner's email address: if CFG_CERN_SITE: ## This is a CERN site - we use 859__f for submitter/record owner's email: CFG_WEBSUBMIT_RECORD_OWNER_EMAIL = "859__f" else: ## Non-CERN site. Use 8560_f for submitter/record owner's email: CFG_WEBSUBMIT_RECORD_OWNER_EMAIL = "8560_f" CFG_MSG_USER_NOT_AUTHORIZED = """ """ def User_is_Record_Owner_or_Curator(parameters, curdir, form, user_info=None): """ Check that user is either the original submitter, or that it belongs to the role(s) given as parameter. This enables collaborative editing of records, so that collections can be curated by a group of people in addition to the original submitter. If the user has permission, the function ends silently. If not, it will raise an InvenioWebSubmitFunctionStop, informing the user that they don't have rights and sending them back to the submission web form. This function makes it unnecessary to protect the submission with WebAccess (i.e. 'submit' action): the function can check authorizations by itself. However if the case the action in which this function is used is still protected with WebAccess (eg. an authorization exists for the 'submit' action, in 'MBI'), ALL the possible submitters AND the curators groups must be linked to the authorization in order for WebSubmit to let users reach this function: this function then ensures that only curators or submitters of the record will be able to continue further. A record owner must have her email in the record metadata. A record curator must be in the role given as parameter to this function. WARNING: you must remember that category-based restrictions require you to check that the selected category matches the document to modify: one can select category 'foo' to modify a document submitted in category 'bar', given that submissions are indepedendant of the record they create. WARNING: for backward compatibility reasons, if no role is given as parameter, the function simply check against the WebAccess 'submit' action, with this submission parameters. It then means that anybody connected to the authorization will be able to modify ANY of the records this submission can handle. @parameters: - curator_role: a role or mapping of roles that determine if user is a curator or not. The parameter can simply be the name of a WebAccess role. For eg: curator_photo where 'curator_photo' is a WebAccess role matching curator users for this submission. The parameter can also map the submission categories to different roles, so that different curator groups can be defined. For eg: ARTICLE=curator_art|REPORT=curator_rep|*=curator_gen (syntax: '|' to split mappings, and '=' to map category->role) This specifies that role 'curator_art' is used when category 'Article' is selected (code for this category is 'ARTICLE'), 'curator_rep' when 'Report' ('REPORT' code) is selected, and curator_gen in all other cases. * matches all categories. When defining a mapping category->role, and category cannot be retrieved (for eg. with /submit/direct URLs that do not specify category), only the * rule/role is matched. Eg: foo=role1|*=role2 matches role2 only When no role is defined or matched, the curator role is checked against the WebAccess 'submit' action, for current WebSubmit doctype, action and category. - curator_flag: the name of a file in which '1' is written if current submitter is a curator. Otherwise, an empty file is written. If no value is given, no file is written. @return: Empty string. @Exceptions raised: InvenioWebSubmitFunctionStop when user is denied permission to work with the record. """ global sysno # Get current doctype doctype_fd = open(os.path.join(curdir, 'doctype')) doctype = doctype_fd.read() doctype_fd.close() # Get current action act_fd = open(os.path.join(curdir, 'act')) act = act_fd.read() act_fd.close() # Get category. This one might not exist category = None if os.path.exists(os.path.join(curdir, 'combo%s' % doctype)): category_fd = open(os.path.join(curdir, 'combo%s' % doctype)) category = category_fd.read() category_fd.close() # Get role to belong to in order to be curator. If not specifed, # we simply check against 'submit' WebAccess action for the current # WebSubmit action (for eg. 'MBI') curator_roles = [] try: curator_role = parameters['curator_role'] except: curator_role = '' if '=' in curator_role: # Admin specifed a different role for different category. # For eg: general=curator_gen|photo=curator_photo|*=curator_other curator_roles = [categ_and_role.split('=', 1)[1].strip() \ for categ_and_role in curator_role.split('|') if \ len(categ_and_role.split('=', 1)) == 2 and \ categ_and_role.split('=', 1)[0].strip() in (category, '*')] elif curator_role: curator_roles = [curator_role] ## Get the current user's e-mail address: user_email = user_info["email"].lower() ## Now get the email address(es) of the record submitter(s)/owner(s) from ## the record itself: record_owners_list = [email.lower().strip() for email in \ get_fieldvalues(sysno, CFG_WEBSUBMIT_RECORD_OWNER_EMAIL)] ## Now determine whether this user is listed in the record as an "owner" ## (or submitter): user_has_permission = False user_msg = "" if user_email not in ("", "guest") and user_email in record_owners_list: ## This user's email address is listed in the record. She should ## be allowed to work with it: user_has_permission = True # Check if user is curator is_curator = False if curator_roles: # Check against roles for role in curator_roles: if not acc_get_role_id(role): # Role is not defined continue if acc_is_user_in_role(user_info, acc_get_role_id(role)): # One matching role found user_has_permission = True is_curator = True break else: # Check against authorization for 'submit' (for backward compatibility) (auth_code, dummy) = acc_authorize_action(user_info, \ "submit", \ verbose=0, \ doctype=doctype, \ act=act) if auth_code == 0: ## The user is a curator for this ## submission/collection. Do not prevent access. is_curator = True user_has_permission = True try: curator_flag = parameters['curator_flag'] if curator_flag: flag_fd = open(os.path.join(curdir, curator_flag), 'w') flag_fd.write(is_curator and '1' or '0') flag_fd.close() except: pass ## Finally, if the user still doesn't have permission to work with this ## record, raise an InvenioWebSubmitFunctionStop exception sending the ## user back to the form. if not user_has_permission: raise InvenioWebSubmitFunctionStop(CFG_MSG_USER_NOT_AUTHORIZED) return "" diff --git a/modules/websubmit/web/publiline.py b/modules/websubmit/web/publiline.py index 70ca166fb..cd9fb2c62 100644 --- a/modules/websubmit/web/publiline.py +++ b/modules/websubmit/web/publiline.py @@ -1,1905 +1,1906 @@ ## This file is part of Invenio. ## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ publiline_complex.py -- implementes ... actors in this process are: 1. author -- subilmts ... 2. edi 3; ref Il ne faut pas oublier de definir les roles... """ __revision__ = "$Id$" ## import interesting modules: import os import re from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_SITE_ADMIN_EMAIL, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_SITE_URL, \ CFG_PYLIBDIR, \ CFG_WEBSUBMIT_STORAGEDIR, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_SECURE_URL, \ CFG_SITE_RECORD from invenio.dbquery import run_sql, Error, OperationalError from invenio.access_control_engine import acc_authorize_action from invenio.access_control_admin import acc_get_role_users, acc_get_role_id from invenio.webpage import page, create_error_box from invenio.webuser import getUid, get_email, page_not_authorized, collect_user_info from invenio.messages import gettext_set_language, wash_language #from invenio.websubmit_config import * -from invenio.search_engine import search_pattern, get_fieldvalues,check_user_can_view_record +from invenio.search_engine import search_pattern, check_user_can_view_record +from invenio.search_engine_utils import get_fieldvalues from invenio.websubmit_functions.Retrieve_Data import Get_Field from invenio.mailutils import send_email from invenio.urlutils import wash_url_argument from invenio.webgroup_dblayer import get_group_infos, insert_new_group, insert_new_member, delete_member from invenio.webaccessadmin_lib import cleanstring_email from invenio.access_control_config import MAXSELECTUSERS from invenio.access_control_admin import acc_get_user_email from invenio.access_control_engine import acc_get_authorized_emails from invenio.webmessage import perform_request_send import invenio.webbasket_dblayer as basketdb from invenio.webbasket_config import CFG_WEBBASKET_SHARE_LEVELS, CFG_WEBBASKET_CATEGORIES, CFG_WEBBASKET_SHARE_LEVELS_ORDERED from invenio.errorlib import register_exception from invenio.bibrecord import create_records, record_get_field_value, record_get_field_values execfile("%s/invenio/websubmit_functions/Retrieve_Data.py" % CFG_PYLIBDIR) import invenio.template websubmit_templates = invenio.template.load('websubmit') CFG_WEBSUBMIT_PENDING_DIR = "%s/pending" % CFG_WEBSUBMIT_STORAGEDIR CFG_WEBSUBMIT_DUMMY_MARC_XML_REC = "dummy_marcxml_rec" CFG_WEBSUBMIT_MARC_XML_REC = "recmysql" def perform_request_save_comment(*args, **kwargs): """ FIXME: this function is a dummy workaround for the obsoleted function calls below. Should get deleted at the same time as them. """ return def index(req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG,doctype="",categ="",RN="",send="",flow="",apptype="", action="", email_user_pattern="", id_user="", id_user_remove="", validate="", id_user_val="", msg_subject="", msg_body="", reply="", commentId=""): ln = wash_language(ln) categ = wash_url_argument(categ, 'str') RN = wash_url_argument(RN, 'str') send = wash_url_argument(send, 'str') flow = wash_url_argument(flow, 'str') apptype = wash_url_argument(apptype, 'str') action = wash_url_argument(action, 'str') email_user_pattern = wash_url_argument(email_user_pattern, 'str') id_user = wash_url_argument(id_user, 'int') id_user_remove = wash_url_argument(id_user_remove, 'int') validate = wash_url_argument(validate, 'str') id_user_val = wash_url_argument(id_user_val, 'int') msg_subject = wash_url_argument(msg_subject, 'str') msg_body = wash_url_argument(msg_body, 'str') reply = wash_url_argument(reply, 'str') commentId = wash_url_argument(commentId, 'str') # load the right message language _ = gettext_set_language(ln) t="" # get user ID: try: uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "../publiline.py/index", navmenuid='yourapprovals') uid_email = get_email(uid) except Error, e: return errorMsg(str(e),req, ln = ln) if flow == "cplx": if doctype == "": t = selectCplxDoctype(ln) elif (categ == "") or (apptype == ""): t = selectCplxCateg(doctype, ln) elif RN == "": t = selectCplxDocument(doctype, categ, apptype, ln) elif action == "": t = __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln) else: t = __doCplxAction(req, doctype, categ, RN, apptype, action, email_user_pattern, id_user, id_user_remove, validate, id_user_val, msg_subject, msg_body, reply, commentId, ln) return page(title=_("Document Approval Workflow"), navtrail= """%(account)s""" % { 'sitesecureurl' : CFG_SITE_SECURE_URL, 'account' : _("Your Account"), }, body=t, description="", keywords="", uid=uid, language=ln, req=req, navmenuid='yourapprovals') else: if doctype == "": t = selectDoctype(ln) elif categ == "": t = selectCateg(doctype, ln) elif RN == "": t = selectDocument(doctype, categ, ln) else: t = __displayDocument(req, doctype, categ, RN, send, ln) return page(title=_("Approval and Refereeing Workflow"), navtrail= """%(account)s""" % { 'sitesecureurl' : CFG_SITE_SECURE_URL, 'account' : _("Your Account"), }, body=t, description="", keywords="", uid=uid, language=ln, req=req, navmenuid='yourapprovals') def selectDoctype(ln = CFG_SITE_LANG): res = run_sql("select DISTINCT doctype from sbmAPPROVAL") docs = [] for row in res: res2 = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (row[0],)) docs.append({ 'doctype' : row[0], 'docname' : res2[0][0], }) t = websubmit_templates.tmpl_publiline_selectdoctype( ln = ln, docs = docs, ) return t def selectCplxDoctype(ln = CFG_SITE_LANG): res = run_sql("select DISTINCT doctype from sbmCPLXAPPROVAL") docs = [] for row in res: res2 = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (row[0],)) docs.append({ 'doctype' : row[0], 'docname' : res2[0][0], }) t = websubmit_templates.tmpl_publiline_selectcplxdoctype( ln = ln, docs = docs, ) return t def selectCateg(doctype, ln = CFG_SITE_LANG): t="" res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s",(doctype,)) title = res[0][0] sth = run_sql("select * from sbmCATEGORIES where doctype=%s order by lname",(doctype,)) if len(sth) == 0: categ = "unknown" return selectDocument(doctype,categ, ln = ln) categories = [] for arr in sth: waiting = 0 rejected = 0 approved = 0 sth2 = run_sql("select COUNT(*) from sbmAPPROVAL where doctype=%s and categ=%s and status='waiting'", (doctype,arr[1],)) waiting = sth2[0][0] sth2 = run_sql("select COUNT(*) from sbmAPPROVAL where doctype=%s and categ=%s and status='approved'",(doctype,arr[1],)) approved = sth2[0][0] sth2 = run_sql("select COUNT(*) from sbmAPPROVAL where doctype=%s and categ=%s and status='rejected'",(doctype,arr[1],)) rejected = sth2[0][0] categories.append({ 'waiting' : waiting, 'approved' : approved, 'rejected' : rejected, 'id' : arr[1], }) t = websubmit_templates.tmpl_publiline_selectcateg( ln = ln, categories = categories, doctype = doctype, title = title, ) return t def selectCplxCateg(doctype, ln = CFG_SITE_LANG): t="" res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s",(doctype,)) title = res[0][0] sth = run_sql("SELECT * FROM sbmCATEGORIES WHERE doctype=%s ORDER BY lname",(doctype,)) if len(sth) == 0: categ = "unknown" return selectCplxDocument(doctype,categ, "", ln = ln) types = {} for apptype in ('RRP', 'RPB', 'RDA'): for arr in sth: info = {'id' : arr[1], 'desc' : arr[2],} for status in ('waiting', 'rejected', 'approved', 'cancelled'): info[status] = __db_count_doc (doctype, arr[1], status, apptype) types.setdefault (apptype, []).append(info) t = websubmit_templates.tmpl_publiline_selectcplxcateg( ln = ln, types = types, doctype = doctype, title = title, ) return t def selectDocument(doctype,categ, ln = CFG_SITE_LANG): t="" res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,)) title = res[0][0] if categ == "": categ == "unknown" docs = [] sth = run_sql("select rn,status from sbmAPPROVAL where doctype=%s and categ=%s order by status DESC,rn DESC",(doctype,categ)) for arr in sth: docs.append({ 'RN' : arr[0], 'status' : arr[1], }) t = websubmit_templates.tmpl_publiline_selectdocument( ln = ln, doctype = doctype, title = title, categ = categ, docs = docs, ) return t def selectCplxDocument(doctype,categ,apptype, ln = CFG_SITE_LANG): t="" res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,)) title = res[0][0] sth = run_sql("select lname from sbmCATEGORIES where doctype=%s and sname=%s order by lname",(doctype,categ,)) if len(sth) != 0: categname = sth[0][0] else: categname = "Unknown" docs = [] sth = run_sql("select rn,status from sbmCPLXAPPROVAL where doctype=%s and categ=%s and type=%s order by status DESC,rn DESC",(doctype,categ,apptype)) for arr in sth: docs.append({ 'RN' : arr[0], 'status' : arr[1], }) t = websubmit_templates.tmpl_publiline_selectcplxdocument( ln = ln, doctype = doctype, title = title, categ = categ, categname = categname, docs = docs, apptype = apptype, ) return t def __displayDocument(req, doctype,categ,RN,send, ln = CFG_SITE_LANG): # load the right message language _ = gettext_set_language(ln) t="" res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,)) docname = res[0][0] if categ == "": categ = "unknown" sth = run_sql("select rn,status,dFirstReq,dLastReq,dAction,access,note from sbmAPPROVAL where rn=%s",(RN,)) if len(sth) > 0: arr = sth[0] rn = arr[0] status = arr[1] dFirstReq = arr[2] dLastReq = arr[3] dAction = arr[4] access = arr[5] note = arr[6] else: return _("Approval has never been requested for this document.") + "
     " ## Get the details of the pending item: item_details = get_pending_item_details(doctype, RN) ## get_pending_item_details has returned either None or a dictionary ## with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } if item_details is not None: authors = ", ".join(item_details['authors']) newrn = item_details['report-number'] title = item_details['title'] sysno = item_details['recid'] else: # Was not found in the pending directory. Already approved? try: (authors, title, sysno) = getInfo(RN) newrn = RN if sysno is None: return _("Unable to display document.") except: return _("Unable to display document.") user_info = collect_user_info(req) can_view_record_p, msg = check_user_can_view_record(user_info, sysno) if can_view_record_p != 0: return msg confirm_send = 0 if send == _("Send Again"): if authors == "unknown" or title == "unknown": SendWarning(doctype, categ, RN, title, authors, access) else: # @todo - send in different languages #SendEnglish(doctype,categ,RN,title,authors,access,sysno) send_approval(doctype, categ, RN, title, authors, access, sysno) run_sql("update sbmAPPROVAL set dLastReq=NOW() where rn=%s",(RN,)) confirm_send = 1 if status == "waiting": if categ == "unknown": ## FIXME: This was necessary for document types without categories, ## such as DEMOBOO: categ = "*" (auth_code, auth_message) = acc_authorize_action(req, "referee",verbose=0,doctype=doctype, categ=categ) else: (auth_code, auth_message) = (None, None) t = websubmit_templates.tmpl_publiline_displaydoc( ln = ln, docname = docname, doctype = doctype, categ = categ, rn = rn, status = status, dFirstReq = dFirstReq, dLastReq = dLastReq, dAction = dAction, access = access, confirm_send = confirm_send, auth_code = auth_code, auth_message = auth_message, authors = authors, title = title, sysno = sysno, newrn = newrn, note = note, ) return t def __displayCplxDocument(req, doctype,categ,RN,apptype, reply, commentId, ln = CFG_SITE_LANG): # load the right message language _ = gettext_set_language(ln) t="" uid = getUid(req) res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,)) docname = res[0][0] if categ == "": categ = "unknown" key = (RN, apptype) infos = __db_get_infos (key) if len(infos) > 0: (status, id_group, id_bskBASKET, id_EdBoardGroup, dFirstReq,dLastReq,dEdBoardSel, dRefereeSel, dRefereeRecom, dEdBoardRecom, dPubComRecom, dProjectLeaderAction) = infos[0] dates = {'dFirstReq' : dFirstReq, 'dLastReq' : dLastReq, 'dEdBoardSel' : dEdBoardSel, 'dRefereeSel' : dRefereeSel, 'dRefereeRecom' : dRefereeRecom, 'dEdBoardRecom' : dEdBoardRecom, 'dPubComRecom' : dPubComRecom, 'dProjectLeaderAction' : dProjectLeaderAction, } else: return _("Approval has never been requested for this document.") + "
     " ## Removing call to deprecated "getInAlice" function and replacing it with ## a call to the newer "get_brief_doc_details_from_repository" function: ## try: ## (authors,title,sysno,newrn) = getInAlice(doctype,categ,RN) ## except TypeError: ## return _("Unable to display document.") item_details = get_brief_doc_details_from_repository(RN) ## get_brief_doc_details_from_repository has returned either None ## or a dictionary with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } if item_details is not None: ## Details of the item were found in the Invenio repository authors = ", ".join(item_details['authors']) newrn = item_details['report-number'] title = item_details['title'] sysno = item_details['recid'] else: ## Can't find any document details. return _("Unable to display document.") if status == "waiting": isPubCom = __is_PubCom (req, doctype) isEdBoard = __is_EdBoard (uid, id_EdBoardGroup) isReferee = __is_Referee (uid, id_bskBASKET) isProjectLeader = __is_ProjectLeader (req, doctype, categ) isAuthor = __is_Author (uid, sysno) else: isPubCom = None isEdBoard = None isReferee = None isProjectLeader = None isAuthor = None user_info = collect_user_info(req) can_view_record_p, msg = check_user_can_view_record(user_info, sysno) if can_view_record_p != 0: return msg t += websubmit_templates.tmpl_publiline_displaycplxdoc( ln = ln, docname = docname, doctype = doctype, categ = categ, rn = RN, apptype = apptype, status = status, dates = dates, isPubCom = isPubCom, isEdBoard = isEdBoard, isReferee = isReferee, isProjectLeader = isProjectLeader, isAuthor = isAuthor, authors = authors, title = title, sysno = sysno, newrn = newrn, ) if id_bskBASKET > 0: rights = basketdb.get_max_user_rights_on_basket(uid, id_bskBASKET) if not(__check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['READITM'])): return t # FIXME This error will be fixed with Sam's new version of publiline. # pylint: disable=E1101 comments = basketdb.get_comments(id_bskBASKET, sysno) # pylint: enable=E1101 if dProjectLeaderAction != None: user_can_add_comment = 0 else: user_can_add_comment = __check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT']) comment_subject = "" comment_body = "" if reply == "true": #Get the message subject and body from the comment for comment in comments: if str(commentId) == str(comment[0]): comment_subject = comment[2] comment_body = comment[3] comment_subject = comment_subject.lstrip("Re: ") comment_subject = "Re: " + comment_subject comment_body = "> " + comment_body.replace("\n", "\n> ") t += websubmit_templates.tmpl_publiline_displaycplxdocitem( doctype, categ, RN, apptype, "AddComment", comments, (__check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT']), user_can_add_comment, __check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['DELCMT'])), selected_category=CFG_WEBBASKET_CATEGORIES['GROUP'], selected_topic=0, selected_group_id=id_group, comment_subject=comment_subject, comment_body=comment_body, ln=ln) return t def __check_basket_sufficient_rights(rights_user_has, rights_needed): """Private function, check if the rights are sufficient.""" try: out = CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights_user_has) >= \ CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights_needed) except ValueError: out = 0 return out def __is_PubCom (req,doctype): (isPubCom, auth_message) = acc_authorize_action(req, "pubcomchair",verbose=0,doctype=doctype) return isPubCom def __is_EdBoard (uid, id_EdBoardGroup): isEdBoard = None if id_EdBoardGroup > 0: edBoard = run_sql("""SELECT u.id FROM user u LEFT JOIN user_usergroup ug ON u.id = ug.id_user WHERE ug.id_usergroup = '%s' and user_status != 'A' AND user_status != 'P'""" % (id_EdBoardGroup, )) for uid_scan in edBoard: if uid == uid_scan[0]: isEdBoard = 0 break return isEdBoard def __is_Referee (uid, id_bskBASKET): isReferee = None if id_bskBASKET > 0: if basketdb.check_user_owns_baskets (uid, id_bskBASKET) == 1: isReferee = 0 return isReferee def __is_ProjectLeader (req, doctype, categ): (isProjectLeader, auth_message) = acc_authorize_action(req, "projectleader",verbose=0,doctype=doctype,categ=categ) return isProjectLeader def __is_Author (uid, sysno): email = Get_Field("8560_f",sysno) email = re.sub("[\n\r ]+","",email) uid_email = re.sub("[\n\r ]+","", acc_get_user_email(uid)) isAuthor = None if (re.search(uid_email,email,re.IGNORECASE) != None) and (uid_email != ""): isAuthor = 0 return isAuthor def __db_count_doc (doctype, categ, status, apptype): return run_sql("SELECT COUNT(*) FROM sbmCPLXAPPROVAL WHERE doctype=%s AND categ=%s AND status=%s AND type=%s",(doctype,categ,status,apptype,))[0][0] def __db_get_infos (key): return run_sql("SELECT status,id_group,id_bskBASKET,id_EdBoardGroup,dFirstReq,dLastReq,dEdBoardSel,dRefereeSel,dRefereeRecom,dEdBoardRecom,dPubComRecom,dProjectLeaderAction FROM sbmCPLXAPPROVAL WHERE rn=%s and type=%s", key) def __db_set_EdBoardSel_time (key): run_sql("UPDATE sbmCPLXAPPROVAL SET dEdBoardSel=NOW() WHERE rn=%s and type=%s", key) def __db_check_EdBoardGroup ((RN,apptype), id_EdBoardGroup, uid, group_descr): res = get_group_infos (id_EdBoardGroup) if len(res) == 0: id_EdBoardGroup = insert_new_group (uid, RN, group_descr % RN, "VM") run_sql("UPDATE sbmCPLXAPPROVAL SET id_EdBoardGroup=%s WHERE rn=%s and type=%s", (id_EdBoardGroup,RN,apptype,)) return id_EdBoardGroup def __db_set_basket ((RN,apptype), id_bsk): run_sql("UPDATE sbmCPLXAPPROVAL SET id_bskBASKET=%s, dRefereeSel=NOW() WHERE rn=%s and type=%s", (id_bsk,RN,apptype,)) def __db_set_RefereeRecom_time (key): run_sql("UPDATE sbmCPLXAPPROVAL SET dRefereeRecom=NOW() WHERE rn=%s and type=%s", key) def __db_set_EdBoardRecom_time (key): run_sql("UPDATE sbmCPLXAPPROVAL SET dEdBoardRecom=NOW() WHERE rn=%s and type=%s", key) def __db_set_PubComRecom_time (key): run_sql("UPDATE sbmCPLXAPPROVAL SET dPubComRecom=NOW() WHERE rn=%s and type=%s", key) def __db_set_status ((RN,apptype), status): run_sql("UPDATE sbmCPLXAPPROVAL SET status=%s, dProjectLeaderAction=NOW() WHERE rn=%s and type=%s", (status,RN,apptype,)) def __doCplxAction(req, doctype, categ, RN, apptype, action, email_user_pattern, id_user, id_user_remove, validate, id_user_val, msg_subject, msg_body, reply, commentId, ln=CFG_SITE_LANG): """ Perform complex action. Note: all argume,ts are supposed to be washed already. Return HTML body for the paget. In case of errors, deletes hard drive. ;-) """ # load the right message language _ = gettext_set_language(ln) TEXT_RSN_RefereeSel_BASKET_DESCR = "Requests for refereeing process" TEXT_RSN_RefereeSel_MSG_REFEREE_SUBJECT = "Referee selection" TEXT_RSN_RefereeSel_MSG_REFEREE_BODY = "You have been named as a referee for this document :" TEXT_RSN_RefereeSel_MSG_GROUP_SUBJECT = "Please, review this publication" TEXT_RSN_RefereeSel_MSG_GROUP_BODY = "Please, review the following publication" TEXT_RSN_RefereeRecom_MSG_PUBCOM_SUBJECT = "Final recommendation from the referee" TEXT_RSN_PubComRecom_MSG_PRJLEADER_SUBJECT = "Final recommendation from the publication board : " TEXT_RSN_ProjectLeaderDecision_MSG_SUBJECT = "Final decision from the project leader" TEXT_RPB_EdBoardSel_MSG_EDBOARD_SUBJECT = "You have been selected in a editorial board" TEXT_RPB_EdBoardSel_MSG_EDBOARD_BODY = "You have been selected as a member of the editorial board of this document :" TEXT_RPB_EdBoardSel_EDBOARD_GROUP_DESCR = "Editorial board for %s" TEXT_RPB_RefereeSel_BASKET_DESCR = "Requests for publication" TEXT_RPB_RefereeSel_MSG_REFEREE_SUBJECT = "Referee selection" TEXT_RPB_RefereeSel_MSG_REFEREE_BODY = "You have been named as a referee for this document :" TEXT_RPB_RefereeSel_MSG_GROUP_SUBJECT = "Please, review this publication" TEXT_RPB_RefereeSel_MSG_GROUP_BODY = "Please, review the following publication" TEXT_RPB_RefereeRecom_MSG_EDBOARD_SUBJECT = "Final recommendation from the referee" TEXT_RPB_EdBoardRecom_MSG_PUBCOM_SUBJECT = "Final recommendation from the editorial board" TEXT_RPB_PubComRecom_MSG_PRJLEADER_SUBJECT = "Final recommendation from the publication board" TEXT_RPB_ProjectLeaderDecision_MSG_SUBJECT = "Final decision from the project leader" t="" uid = getUid(req) if categ == "": categ = "unknown" key = (RN, apptype) infos = __db_get_infos (key) if len(infos) > 0: (status, id_group, id_bskBASKET, id_EdBoardGroup, dummy, dummy, dEdBoardSel, dRefereeSel, dRefereeRecom, dEdBoardRecom, dPubComRecom, dProjectLeaderAction) = infos[0] else: return _("Approval has never been requested for this document.") + "
     " ## Removing call to deprecated "getInAlice" function and replacing it with ## a call to the newer "get_brief_doc_details_from_repository" function: ## try: ## (authors,title,sysno,newrn) = getInAlice(doctype,categ,RN) ## except TypeError: ## return _("Unable to display document.") item_details = get_brief_doc_details_from_repository(RN) ## get_brief_doc_details_from_repository has returned either None ## or a dictionary with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } if item_details is not None: ## Details of the item were found in the Invenio repository authors = ", ".join(item_details['authors']) newrn = item_details['report-number'] title = item_details['title'] sysno = item_details['recid'] else: ## Can't find any document details. return _("Unable to display document.") if (action == "EdBoardSel") and (apptype == "RPB"): if __is_PubCom (req, doctype) != 0: return _("Action unauthorized for this document.") + "
     " if status == "cancelled": return _("Action unavailable for this document.") + "
     " if validate == "go": if dEdBoardSel == None: __db_set_EdBoardSel_time (key) perform_request_send (uid, "", RN, TEXT_RPB_EdBoardSel_MSG_EDBOARD_SUBJECT, TEXT_RPB_EdBoardSel_MSG_EDBOARD_BODY) return __displayCplxDocument(req, doctype,categ,RN,apptype, reply, commentId, ln) id_EdBoardGroup = __db_check_EdBoardGroup (key, id_EdBoardGroup, uid, TEXT_RPB_EdBoardSel_EDBOARD_GROUP_DESCR) subtitle1 = _('Adding users to the editorial board') # remove letters not allowed in an email email_user_pattern = cleanstring_email(email_user_pattern) stopon1 = "" stopon2 = "" stopon3 = "" users = [] extrausers = [] # pattern is entered if email_user_pattern: # users with matching email-address try: users1 = run_sql("""SELECT id, email FROM user WHERE email<>'' AND email RLIKE %s ORDER BY email """, (email_user_pattern, )) except OperationalError: users1 = () # users that are connected try: users2 = run_sql("""SELECT DISTINCT u.id, u.email FROM user u LEFT JOIN user_usergroup ug ON u.id = ug.id_user WHERE u.email<>'' AND ug.id_usergroup = %s AND u.email RLIKE %s ORDER BY u.email """, (id_EdBoardGroup, email_user_pattern)) except OperationalError: users2 = () # no users that match the pattern if not (users1 or users2): stopon1 = '

    %s

    ' % _("no qualified users, try new search.") elif len(users1) > MAXSELECTUSERS: stopon1 = '

    %s %s, %s (%s %s)

    ' % (len(users1), _("hits"), _("too many qualified users, specify more narrow search."), _("limit"), MAXSELECTUSERS) # show matching users else: users = [] extrausers = [] for (user_id, email) in users1: if (user_id, email) not in users2: users.append([user_id,email,'']) for (user_id, email) in users2: extrausers.append([-user_id, email,'']) try: id_user = int(id_user) except ValueError: pass # user selected already connected to role email_out = acc_get_user_email(id_user) if id_user < 0: stopon2 = '

    %s

    ' % _("users in brackets are already attached to the role, try another one...") # a user is selected elif email_out: result = insert_new_member(id_user, id_EdBoardGroup, "M") stopon2 = '

    confirm: user %s added to the editorial board.

    ' % (email_out, ) subtitle2 = _('Removing users from the editorial board') usersremove = run_sql("""SELECT DISTINCT u.id, u.email FROM user u LEFT JOIN user_usergroup ug ON u.id = ug.id_user WHERE u.email <> "" AND ug.id_usergroup = %s and user_status != 'A' AND user_status != 'P' ORDER BY u.email """, (id_EdBoardGroup, )) try: id_user_remove = int(id_user_remove) except ValueError: pass # user selected already connected to role email_out = acc_get_user_email(id_user_remove) # a user is selected if email_out: result = delete_member(id_EdBoardGroup, id_user_remove) stopon3 = '

    confirm: user %s removed from the editorial board.

    ' % (email_out, ) t = websubmit_templates.tmpl_publiline_displaydocplxaction ( ln = ln, doctype = doctype, categ = categ, rn = RN, apptype = apptype, action = action, status = status, authors = authors, title = title, sysno = sysno, subtitle1 = subtitle1, email_user_pattern = email_user_pattern, stopon1 = stopon1, users = users, extrausers = extrausers, stopon2 = stopon2, subtitle2 = subtitle2, usersremove = usersremove, stopon3 = stopon3, validate_btn = _("Validate the editorial board selection"), ) return t elif (action == "RefereeSel") and ((apptype == "RRP") or (apptype == "RPB")): if apptype == "RRP": to_check = __is_PubCom (req, doctype) TEXT_RefereeSel_BASKET_DESCR = TEXT_RSN_RefereeSel_BASKET_DESCR TEXT_RefereeSel_MSG_REFEREE_SUBJECT = TEXT_RSN_RefereeSel_MSG_REFEREE_SUBJECT TEXT_RefereeSel_MSG_REFEREE_BODY = TEXT_RSN_RefereeSel_MSG_REFEREE_BODY + " " + "\"" + item_details['title'] + "\"" TEXT_RefereeSel_MSG_GROUP_SUBJECT = TEXT_RSN_RefereeSel_MSG_GROUP_SUBJECT TEXT_RefereeSel_MSG_GROUP_BODY = TEXT_RSN_RefereeSel_MSG_GROUP_BODY + " " + "\"" + item_details['title'] + "\"" elif apptype == "RPB": to_check = __is_EdBoard (uid, id_EdBoardGroup) TEXT_RefereeSel_BASKET_DESCR = TEXT_RSN_RefereeSel_BASKET_DESCR TEXT_RefereeSel_MSG_REFEREE_SUBJECT = TEXT_RSN_RefereeSel_MSG_REFEREE_SUBJECT TEXT_RefereeSel_MSG_REFEREE_BODY = TEXT_RSN_RefereeSel_MSG_REFEREE_BODY + " " + "\"" + item_details['title'] + "\"" TEXT_RefereeSel_MSG_GROUP_SUBJECT = TEXT_RSN_RefereeSel_MSG_GROUP_SUBJECT TEXT_RefereeSel_MSG_GROUP_BODY = TEXT_RSN_RefereeSel_MSG_GROUP_BODY + " " + "\"" + item_details['title'] + "\"" else: to_check = None if to_check != 0: return _("Action unauthorized for this document.") + "
     " if status == "cancelled": return _("Action unavailable for this document.") + "
     " if validate == "go": if dRefereeSel == None: id_bsk = basketdb.create_basket (int(id_user_val), RN, TEXT_RefereeSel_BASKET_DESCR) basketdb.share_basket_with_group (id_bsk, id_group, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT']) basketdb.add_to_basket (int(id_user_val), (sysno, ), (id_bsk, )) __db_set_basket (key, id_bsk) email_address = run_sql("""SELECT email FROM user WHERE id = %s """, (id_user_val, ))[0][0] perform_request_send (uid, email_address, "", TEXT_RefereeSel_MSG_REFEREE_SUBJECT, TEXT_RefereeSel_MSG_REFEREE_BODY, 0, 0, 0, ln, 1) sendMailToReferee(doctype,categ,RN,email_address,authors) group_name = run_sql("""SELECT name FROM usergroup WHERE id = %s""", (id_group, ))[0][0] perform_request_send (int(id_user_val), "", group_name, TEXT_RefereeSel_MSG_GROUP_SUBJECT, TEXT_RefereeSel_MSG_GROUP_BODY) sendMailToGroup(doctype,categ,RN,id_group,authors) return __displayCplxDocument(req, doctype,categ,RN,apptype, reply, commentId, ln) subtitle1 = _('Referee selection') # remove letters not allowed in an email email_user_pattern = cleanstring_email(email_user_pattern) stopon1 = "" stopon2 = "" users = [] extrausers = [] # pattern is entered if email_user_pattern: # users with matching email-address try: users1 = run_sql("""SELECT id, email FROM user WHERE email <> "" AND email RLIKE %s ORDER BY email """, (email_user_pattern, )) except OperationalError: users1 = () # no users that match the pattern if not users1: stopon1 = '

    %s

    ' % _("no qualified users, try new search.") elif len(users1) > MAXSELECTUSERS: stopon1 = '

    %s %s, %s (%s %s)

    ' % (len(users1), _("hits"), _("too many qualified users, specify more narrow search."), _("limit"), MAXSELECTUSERS) # show matching users else: users = [] for (user_id, email) in users1: users.append([user_id,email,'']) try: id_user = int(id_user) except ValueError: pass # user selected already connected to role email_out = acc_get_user_email(id_user) # a user is selected if email_out: stopon2 = """

    user %s will be the referee ?

    """ % (email_out, id_user) t = websubmit_templates.tmpl_publiline_displaydocplxaction ( ln = ln, doctype = doctype, categ = categ, rn = RN, apptype = apptype, action = action, status = status, authors = authors, title = title, sysno = sysno, subtitle1 = subtitle1, email_user_pattern = email_user_pattern, stopon1 = stopon1, users = users, extrausers = [], stopon2 = stopon2, subtitle2 = "", usersremove = [], stopon3 = "", validate_btn = "", ) return t elif (action == "AddAuthorList") and (apptype == "RPB"): return "" elif (action == "AddComment") and ((apptype == "RRP") or (apptype == "RPB")): t = "" if validate == "go": (errors, infos) = perform_request_save_comment (uid, id_bskBASKET, sysno, msg_subject, msg_body, ln) t += "%(infos)s

    " % {'infos' : infos[0]} t += """
    """ % {'doctype' : doctype, 'categ' : categ, 'rn' : RN, 'apptype' : apptype, 'button_label' : _("Come back to the document"), } return t elif (action == "RefereeRecom") and ((apptype == "RRP") or (apptype == "RPB")): if __is_Referee (uid, id_bskBASKET) != 0: return _("Action unauthorized for this document.") + "
     " if status == "cancelled": return _("Action unavailable for this document.") + "
     " if apptype == "RRP": # Build publication committee chair's email address user_addr = "" # Try to retrieve the publication committee chair's email from the role database for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_%s" % (doctype,categ))): user_addr += run_sql("""SELECT email FROM user WHERE id = %s """, (user[0], ))[0][0] + "," # And if there are general publication committee chair's for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_*" % doctype)): user_addr += run_sql("""SELECT email FROM user WHERE id = %s """, (user[0], ))[0][0] + "," user_addr = re.sub(",$","",user_addr) group_addr = "" TEXT_RefereeRecom_MSG_SUBJECT = TEXT_RSN_RefereeRecom_MSG_PUBCOM_SUBJECT elif apptype == "RPB": user_addr = "" group_addr = RN TEXT_RefereeRecom_MSG_SUBJECT = TEXT_RPB_RefereeRecom_MSG_EDBOARD_SUBJECT else: user_addr = "" group_addr = "" TEXT_RefereeRecom_MSG_SUBJECT = "" if validate == "approve" or validate == "reject": if dRefereeRecom == None: perform_request_send (uid, user_addr, group_addr, msg_subject, msg_body, 0, 0, 0, ln, 1) if validate == "approve": msg_body = "Approved : " + msg_body else: msg_body = "Rejected : " + msg_body #Get the Project Leader's email address # email = "" # for user in acc_get_role_users(acc_get_role_id("projectleader_%s_%s" % (doctype,categ))): # email += run_sql("""SELECT email FROM user WHERE id = %s """, (user[0], ))[0][0] + "," # sendMailToProjectLeader(doctype, categ, RN, email, authors, "referee", msg_body) sendMailtoCommitteeChair(doctype, categ, RN, user_addr, authors) __db_set_RefereeRecom_time (key) return __displayCplxDocument(req, doctype,categ,RN,apptype, reply, commentId, ln) t = websubmit_templates.tmpl_publiline_displaycplxrecom ( ln = ln, doctype = doctype, categ = categ, rn = RN, apptype = apptype, action = action, status = status, authors = authors, title = title, sysno = sysno, msg_to = user_addr, msg_to_group = group_addr, msg_subject = TEXT_RefereeRecom_MSG_SUBJECT, ) return t elif (action == "EdBoardRecom") and (apptype == "RPB"): if __is_EdBoard (uid, id_EdBoardGroup) != 0: return _("Action unauthorized for this document.") + "
     " if status == "cancelled": return _("Action unavailable for this document.") + "
     " # Build publication committee chair's email address user_addr = "" # Try to retrieve the publication committee chair's email from the role database for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_%s" % (doctype,categ))): user_addr += run_sql("""SELECT nickname FROM user WHERE id = %s """, (user[0], ))[0][0] + "," # And if there are general publication committee chair's for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_*" % doctype)): user_addr += run_sql("""SELECT nickname FROM user WHERE id = %s """, (user[0], ))[0][0] + "," user_addr = re.sub(",$","",user_addr) if validate == "go": if dEdBoardRecom == None: perform_request_send (uid, user_addr, "", msg_subject, msg_body) __db_set_EdBoardRecom_time (key) return __displayCplxDocument(req, doctype,categ,RN,apptype, reply, commentId, ln) t = websubmit_templates.tmpl_publiline_displaycplxrecom ( ln = ln, doctype = doctype, categ = categ, rn = RN, apptype = apptype, action = action, status = status, authors = authors, title = title, sysno = sysno, msg_to = user_addr, msg_to_group = "", msg_subject = TEXT_RPB_EdBoardRecom_MSG_PUBCOM_SUBJECT, ) return t elif (action == "PubComRecom") and ((apptype == "RRP") or (apptype == "RPB")): if __is_PubCom (req, doctype) != 0: return _("Action unauthorized for this document.") + "
     " if status == "cancelled": return _("Action unavailable for this document.") + "
     " # Build project leader's email address user_addr = "" # Try to retrieve the project leader's email from the role database for user in acc_get_role_users(acc_get_role_id("projectleader_%s_%s" % (doctype,categ))): user_addr += run_sql("""SELECT email FROM user WHERE id = %s """, (user[0], ))[0][0] + "," # And if there are general project leader's for user in acc_get_role_users(acc_get_role_id("projectleader_%s_*" % doctype)): user_addr += run_sql("""SELECT email FROM user WHERE id = %s """, (user[0], ))[0][0] + "," user_addr = re.sub(",$","",user_addr) if apptype == "RRP": TEXT_PubComRecom_MSG_SUBJECT = TEXT_RSN_PubComRecom_MSG_PRJLEADER_SUBJECT elif apptype == "RPB": group_addr = RN TEXT_PubComRecom_MSG_SUBJECT = TEXT_RPB_PubComRecom_MSG_PRJLEADER_SUBJECT else: TEXT_PubComRecom_MSG_SUBJECT = "" if validate == "approve" or validate == "reject": if validate == "approve": msg_body = "Approved : " + msg_body else: msg_body = "Rejected : " + msg_body if dPubComRecom == None: perform_request_send (uid, user_addr, "", msg_subject, msg_body, 0, 0, 0, ln, 1) sendMailToProjectLeader(doctype, categ, RN, user_addr, authors, "publication committee chair", msg_body) __db_set_PubComRecom_time (key) return __displayCplxDocument(req, doctype,categ,RN,apptype, reply, commentId, ln) t = websubmit_templates.tmpl_publiline_displaycplxrecom ( ln = ln, doctype = doctype, categ = categ, rn = RN, apptype = apptype, action = action, status = status, authors = authors, title = title, sysno = sysno, msg_to = user_addr, msg_to_group = "", msg_subject = TEXT_PubComRecom_MSG_SUBJECT + " " + "\"" + item_details['title'] + "\"", ) return t elif (action == "ProjectLeaderDecision") and ((apptype == "RRP") or (apptype == "RPB")): if __is_ProjectLeader (req, doctype, categ) != 0: return _("Action unauthorized for this document.") + "
     " if status == "cancelled": return _("Action unavailable for this document.") + "
     " t += """
    """ % {'doctype' : doctype, 'categ' : categ, 'rn' : RN, 'apptype' : apptype, 'button_label' : _("Back to the document"), } if validate == "approve": if dProjectLeaderAction == None: (errors, infos) = perform_request_save_comment (uid, id_bskBASKET, sysno, msg_subject, msg_body, ln) out = "%(infos)s

    " % {'infos' : infos[0]} sendMailToSubmitter(doctype, categ, RN, "approved") __db_set_status (key, 'approved') return out + t elif validate == "reject": if dProjectLeaderAction == None: (errors, infos) = perform_request_save_comment (uid, id_bskBASKET, sysno, msg_subject, msg_body, ln) out = "%(infos)s

    " % {'infos' : infos[0]} sendMailToSubmitter(doctype, categ, RN, "rejected") __db_set_status (key, 'rejected') return out + t validation = """ """ % {'select' : _('Select:'), 'approve' : _('Approve'), 'reject' : _('Reject'), 'button_label' : _('Take a decision'), } if apptype == "RRP": TEXT_ProjectLeaderDecision_MSG_SUBJECT = TEXT_RSN_ProjectLeaderDecision_MSG_SUBJECT elif apptype == "RPB": TEXT_ProjectLeaderDecision_MSG_SUBJECT = TEXT_RPB_ProjectLeaderDecision_MSG_SUBJECT else: TEXT_ProjectLeaderDecision_MSG_SUBJECT = "" t = websubmit_templates.tmpl_publiline_displaywritecomment(doctype, categ, RN, apptype, action, _("Take a decision"), TEXT_ProjectLeaderDecision_MSG_SUBJECT, validation, "", ln) return t elif (action == "ProjectLeaderDecision") and (apptype == "RDA"): if __is_ProjectLeader (req, doctype, categ) != 0: return _("Action unauthorized for this document.") + "
     " if status == "cancelled": return _("Action unavailable for this document.") + "
     " if validate == "approve": if dProjectLeaderAction == None: __db_set_status (key, 'approved') return __displayCplxDocument(req, doctype,categ,RN,apptype, reply, commentId, ln) elif validate == "reject": if dProjectLeaderAction == None: __db_set_status (key, 'rejected') return __displayCplxDocument(req, doctype,categ,RN,apptype, reply, commentId, ln) t = """

    """ % { 'rn' : RN, 'categ' : categ, 'doctype' : doctype, 'apptype' : apptype, 'action' : action, 'approve' : _('Approve'), 'reject' : _('Reject'), } return t elif (action == "AuthorCancel") and ((apptype == "RRP") or (apptype == "RPB") or (apptype == "RDA")): if __is_Author (uid, sysno) != 0: return _("Action unauthorized for this document.") + "
     " if (status == "cancelled") or (dProjectLeaderAction != None): return _("Action unavailable for this document.") + "
     " if validate == "go": __db_set_status (key, 'cancelled') return __displayCplxDocument(req, doctype,categ,RN,apptype, reply, commentId, ln) t = """

    """ % { 'rn' : RN, 'categ' : categ, 'doctype' : doctype, 'apptype' : apptype, 'action' : action, 'cancel' : _('Cancel'), } return t else: return _("Wrong action for this document.") + "
     " return t def get_pending_item_details(doctype, reportnumber): """Given a doctype and reference number, try to retrieve an item's details. The first place to search for them should be the WebSubmit pending directory. If nothing is retrieved from there, and attempt is made to retrieve them from the Invenio repository itself. @param doctype: (string) - the doctype of the item for which brief details are to be retrieved. @param reportnumber: (string) - the report number of the item for which details are to be retrieved. @return: (dictionary or None) - If details are found for the item, they will be returned in a dictionary structured as follows: { 'title' : '-', ## String - the item's title 'recid' : '', ## String - recid taken from the SN file 'report-number' : '', ## String - the item's report number 'authors' : [], ## List - the item's authors } If no details were found a NoneType is returned. """ ## First try to get the details of a document from the pending dir: item_details = get_brief_doc_details_from_pending(doctype, \ reportnumber) if item_details is None: item_details = get_brief_doc_details_from_repository(reportnumber) ## Return the item details: return item_details def get_brief_doc_details_from_pending(doctype, reportnumber): """Try to get some brief details about the submission that is awaiting the referee's decision. Details sought are: + title + Authors + recid (why?) + report-number (why?) This function searches for a MARC XML record in the pending submission's working directory. It prefers the so-called 'dummy' record, but will search for the final MARC XML record that would usually be passed to bibupload (i.e. recmysql) if that is not present. If neither of these records are present, no details will be found. @param doctype: (string) - the WebSubmit document type of the item to be refereed. It is used in order to locate the submission's working directory in the WebSubmit pending directory. @param reportnumber: (string) - the report number of the item for which details are to be recovered. It is used in order to locate the submission's working directory in the WebSubmit pending directory. @return: (dictionary or None) - If details are found for the item, they will be returned in a dictionary structured as follows: { 'title' : '-', ## String - the item's title 'recid' : '', ## String - recid taken from the SN file 'report-number' : '', ## String - the item's report number 'authors' : [], ## List - the item's authors } If no details were found (i.e. no MARC XML files in the submission's working directory), a NoneType is returned. """ pending_doc_details = None marcxml_rec_name = None ## Check for a MARC XML record in the pending dir. ## If it's there, we will use it to obtain certain bibliographic ## information such as title, author(s), etc, which we will then ## display to the referee. ## We favour the "dummy" record (created with the WebSubmit function ## "Make_Dummy_MARC_XML_Record"), because it was made for this ## purpose. If it's not there though, we'll take the normal ## (final) recmysql record that would generally be passed to bibupload. if os.access("%s/%s/%s/%s" % (CFG_WEBSUBMIT_PENDING_DIR, \ doctype, \ reportnumber, \ CFG_WEBSUBMIT_DUMMY_MARC_XML_REC), \ os.F_OK|os.R_OK): ## Found the "dummy" marc xml record in the submission dir. ## Use it: marcxml_rec_name = CFG_WEBSUBMIT_DUMMY_MARC_XML_REC elif os.access("%s/%s/%s/%s" % (CFG_WEBSUBMIT_PENDING_DIR, \ doctype, \ reportnumber, \ CFG_WEBSUBMIT_MARC_XML_REC), \ os.F_OK|os.R_OK): ## Although we didn't find the "dummy" marc xml record in the ## submission dir, we did find the "real" one (that which would ## normally be passed to bibupload). Use it: marcxml_rec_name = CFG_WEBSUBMIT_MARC_XML_REC ## If we have a MARC XML record in the pending submission's ## working directory, go ahead and use it: if marcxml_rec_name is not None: try: fh_marcxml_record = open("%s/%s/%s/%s" \ % (CFG_WEBSUBMIT_PENDING_DIR, \ doctype, \ reportnumber, \ marcxml_rec_name), "r") xmltext = fh_marcxml_record.read() fh_marcxml_record.close() except IOError: ## Unfortunately, it wasn't possible to read the details of the ## MARC XML record. Register the exception. exception_prefix = "Error: Publiline was unable to read the " \ "MARC XML record [%s/%s/%s/%s] when trying to " \ "use it to recover details about a pending " \ "submission." % (CFG_WEBSUBMIT_PENDING_DIR, \ doctype, \ reportnumber, \ marcxml_rec_name) register_exception(prefix=exception_prefix) else: ## Attempt to use bibrecord to create an internal representation ## of the record, from which we can extract certain bibliographic ## information: records = create_records(xmltext, 1, 1) try: record = records[0][0] if record is None: raise ValueError except (IndexError, ValueError): ## Bibrecord couldn't successfully represent the record ## contained in the xmltext string. The record must have ## been empty or badly formed (or something). pass else: ## Dictionary to hold the interesting details of the ## pending item: pending_doc_details = { 'title' : '-', 'recid' : '', 'report-number' : '', 'authors' : [], } ## Get the recid: ## Note - the old "getInPending" function reads the "SN" ## file from the submission's working directory and since ## the "SN" file is currently "magic" and hardcoded ## throughout WebSubmit, I'm going to stick to this model. ## I could, however, have tried to get it from the MARC XML ## record as so: ## recid = record_get_field_value(rec=record, tag="001") try: fh_recid = open("%s/%s/%s/SN" \ % (CFG_WEBSUBMIT_PENDING_DIR, \ doctype, \ reportnumber), "r") recid = fh_recid.read() fh_recid.close() except IOError: ## Probably, there was no "SN" file in the submission's ## working directory. pending_doc_details['recid'] = "" else: pending_doc_details['recid'] = recid.strip() ## Item report number (from record): ## Note: I don't know what purpose this serves. It appears ## to be used in the email that is sent to the author, but ## it seems funny to me, since we already have the report ## number (which is indeed used to find the submission's ## working directory in pending). Perhaps it's used for ## cases when the reportnumber is changed after approval? ## To investigate when time allows: finalrn = record_get_field_value(rec=record, \ tag="037", \ code="a") if finalrn != "": pending_doc_details['report-number'] = finalrn ## Item title: title = record_get_field_value(rec=record, \ tag="245", \ code="a") if title != "": pending_doc_details['title'] = title else: ## Alternative title: alt_title = record_get_field_value(rec=record, \ tag="246", \ ind1="1", \ code="a") if alt_title != "": pending_doc_details['title'] = alt_title ## Item first author: first_author = record_get_field_value(rec=record, \ tag="100", \ code="a") if first_author != "": pending_doc_details['authors'].append(first_author) ## Other Authors: other_authors = record_get_field_values(rec=record, \ tag="700", \ code="a") for author in other_authors: pending_doc_details['authors'].append(author) ## Return the details discovered about the pending document: return pending_doc_details def get_brief_doc_details_from_repository(reportnumber): """Try to get some brief details about the submission that is awaiting the referee's decision. Details sought are: + title + Authors + recid (why?) + report-number (why?) + email This function searches in the Invenio repository, based on "reportnumber" for a record and then pulls the interesting fields from it. @param reportnumber: (string) - the report number of the item for which details are to be recovered. It is used in the search. @return: (dictionary or None) - If details are found for the item, they will be returned in a dictionary structured as follows: { 'title' : '-', ## String - the item's title 'recid' : '', ## String - recid taken from the SN file 'report-number' : '', ## String - the item's report number 'authors' : [], ## List - the item's authors } If no details were found a NoneType is returned. """ ## Details of the pending document, as found in the repository: pending_doc_details = None ## Search for records matching this "report number" found_record_ids = list(search_pattern(req=None, \ p=reportnumber, \ f="reportnumber", \ m="e")) ## How many records were found? if len(found_record_ids) == 1: ## Found only 1 record. Get the fields of interest: pending_doc_details = { 'title' : '-', 'recid' : '', 'report-number' : '', 'authors' : [], 'email' : '', } recid = found_record_ids[0] ## Authors: first_author = get_fieldvalues(recid, "100__a") for author in first_author: pending_doc_details['authors'].append(author) other_authors = get_fieldvalues(recid, "700__a") for author in other_authors: pending_doc_details['authors'].append(author) ## Title: title = get_fieldvalues(recid, "245__a") if len(title) > 0: pending_doc_details['title'] = title[0] else: ## There was no value for title - check for an alternative title: alt_title = get_fieldvalues(recid, "2641_a") if len(alt_title) > 0: pending_doc_details['title'] = alt_title[0] ## Record ID: pending_doc_details['recid'] = recid ## Report Number: reptnum = get_fieldvalues(recid, "037__a") if len(reptnum) > 0: pending_doc_details['report-number'] = reptnum[0] ## Email: email = get_fieldvalues(recid, "859__f") if len(email) > 0: pending_doc_details['email'] = email[0] elif len(found_record_ids) > 1: ## Oops. This is unexpected - there shouldn't be me multiple matches ## for this item. The old "getInAlice" function would have simply ## taken the first record in the list. That's not very nice though. ## Some kind of warning or error should be raised here. FIXME. pass return pending_doc_details # Retrieve info about document def getInfo(RN): """ Retrieve basic info from record with given report number. Returns (authors, title, sysno) """ authors = None title = None sysno = None recids = search_pattern(p=RN, f='037__a') if len(recids) == 1: sysno = int(recids.tolist()[0]) authors = ','.join(get_fieldvalues(sysno, "100__a") + get_fieldvalues(sysno, "700__a")) title = ','.join(get_fieldvalues(sysno, "245__a")) return (authors, title, sysno) #seek info in pending directory def getInPending(doctype,categ,RN): """FIXME: DEPRECATED!""" PENDIR="%s/pending" % CFG_WEBSUBMIT_STORAGEDIR if os.path.exists("%s/%s/%s/AU" % (PENDIR,doctype,RN)): fp = open("%s/%s/%s/AU" % (PENDIR,doctype,RN),"r") authors=fp.read() fp.close() else: authors = "" if os.path.exists("%s/%s/%s/TI" % (PENDIR,doctype,RN)): fp = open("%s/%s/%s/TI" % (PENDIR,doctype,RN),"r") title=fp.read() fp.close() else: title = "" if os.path.exists("%s/%s/%s/SN" % (PENDIR,doctype,RN)): fp = open("%s/%s/%s/SN" % (PENDIR,doctype,RN),"r") sysno=fp.read() fp.close() else: sysno = "" if title == "" and os.path.exists("%s/%s/%s/TIF" % (PENDIR,doctype,RN)): fp = open("%s/%s/%s/TIF" % (PENDIR,doctype,RN),"r") title=fp.read() fp.close() if title == "": return 0 else: return (authors,title,sysno,"") #seek info in Alice database def getInAlice(doctype,categ,RN): """FIXME: DEPRECATED!""" # initialize sysno variable sysno = "" searchresults = list(search_pattern(req=None, p=RN, f="reportnumber")) if len(searchresults) == 0: return 0 sysno = searchresults[0] if sysno != "": title = Get_Field('245__a',sysno) emailvalue = Get_Field('8560_f',sysno) authors = Get_Field('100__a',sysno) authors += "\n%s" % Get_Field('700__a',sysno) newrn = Get_Field('037__a',sysno) return (authors,title,sysno,newrn) else: return 0 def SendEnglish(doctype,categ,RN,title,authors,access,sysno): FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) # retrieve useful information from webSubmit configuration res = run_sql("select value from sbmPARAMETERS where name='categformatDAM' and doctype=%s", (doctype,)) categformat = res[0][0] categformat = re.sub("","([^-]*)",categformat) categs = re.match(categformat,RN) if categs is not None: categ = categs.group(1) else: categ = "unknown" res = run_sql("select value from sbmPARAMETERS where name='addressesDAM' and doctype=%s",(doctype,)) if len(res) > 0: otheraddresses = res[0][0] otheraddresses = otheraddresses.replace("",categ) else: otheraddresses = "" # Build referee's email address refereeaddress = "" # Try to retrieve the referee's email from the referee's database for user in acc_get_role_users(acc_get_role_id("referee_%s_%s" % (doctype,categ))): refereeaddress += user[1] + "," # And if there are general referees for user in acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)): refereeaddress += user[1] + "," refereeaddress = re.sub(",$","",refereeaddress) # Creation of the mail for the referee addresses = "" if refereeaddress != "": addresses = refereeaddress + "," if otheraddresses != "": addresses += otheraddresses else: addresses = re.sub(",$","",addresses) if addresses=="": SendWarning(doctype,categ,RN,title,authors,access) return 0 if authors == "": authors = "-" res = run_sql("select value from sbmPARAMETERS where name='directory' and doctype=%s", (doctype,)) directory = res[0][0] message = """ The document %s has been published as a Communication. Your approval is requested for it to become an official Note. Title: %s Author(s): %s To access the document(s), select the file(s) from the location: <%s/%s/%s/files/> To approve/reject the document, you should go to this URL: <%s/approve.py?%s> --------------------------------------------- Best regards. The submission team.""" % (RN,title,authors,CFG_SITE_URL,CFG_SITE_RECORD,sysno,CFG_SITE_URL,access) # send the mail send_email(FROMADDR,addresses,"Request for Approval of %s" % RN, message,footer="") return "" def send_approval(doctype, categ, rn, title, authors, access, sysno): fromaddr = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL) if not categ: categ = "nocategory" if not doctype: doctype = "nodoctype" addresses = acc_get_authorized_emails('referee', categ=categ, doctype=doctype) if not addresses: return SendWarning(doctype, categ, rn, title, authors, access) if not authors: authors = "-" message = """ The document %s has been published as a Communication. Your approval is requested for it to become an official Note. Title: %s Author(s): %s To access the document(s), select the file(s) from the location: <%s/record/%s/files/> As a referee for this document, you may approve or reject it from the submission interface: <%s/submit?doctype=%s> --------------------------------------------- Best regards. The submission team.""" % (rn, title, authors, CFG_SITE_URL, sysno, CFG_SITE_URL, doctype) # send the mail return send_email(fromaddr, ', '.join(addresses), "Request for Approval of %s" % rn, message, footer="") def SendWarning(doctype,categ,RN,title,authors,access): FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) message = "Failed sending approval email request for %s" % RN # send the mail send_email(FROMADDR,CFG_SITE_ADMIN_EMAIL,"Failed sending approval email request",message) return "" def errorMsg(title,req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG): return page(title="error", body = create_error_box(req, title=title,verbose=0, ln=ln), description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='yourapprovals') def warningMsg(title,req,c=CFG_SITE_NAME,ln=CFG_SITE_LANG): return page(title="warning", body = title, description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='yourapprovals') def sendMailToReferee(doctype,categ,RN,email,authors): item_details = get_brief_doc_details_from_repository(RN) ## get_brief_doc_details_from_repository has returned either None ## or a dictionary with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) message = """ Scientific Note approval for document %s has been submitted to the CERN Document Server. Your recommendation is requested on it. Requested subcategory: %s Title: %s Author(s): %s To access the document(s), select the file(s) from the location: <%s/%s/%s> To make a reccommendation, you should go to this URL: <%s> You can also check the status of the document: <%s> --------------------------------------------- Best regards. The submission team.""" % (str(RN), str(categ), str(item_details['title']), authors, CFG_SITE_URL, CFG_SITE_URL, str(item_details['recid']), str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN+"&action=RefereeRecom"), str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN)) # send the mail send_email(FROMADDR, email,"Request for document %s recommendation" % (RN),message) return "" def sendMailToGroup(doctype,categ,RN,group_id,authors): item_details = get_brief_doc_details_from_repository(RN) ## get_brief_doc_details_from_repository has returned either None ## or a dictionary with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) message = """ Scientific Note approval for document %s has been submitted to the CERN Document Server. Your comments are requested on this document. Requested subcategory: %s Title: %s Author(s): %s To access the document(s), select the file(s) from the location: <%s/%s/%s> To leave a comment or check the status of the approval process, you should go to this URL: <%s> """ % (str(RN), str(categ), str(item_details['title']), authors, CFG_SITE_URL, CFG_SITE_RECORD, str(item_details['recid']), str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN)) # send mails to all members of the ATLAS group group_member_ids = run_sql("SELECT id_user FROM user_usergroup WHERE id_usergroup = '%s'" % (group_id)) for member_id in group_member_ids: member_email = run_sql("SELECT email FROM user WHERE id = '%s'" % (member_id)) if not member_email[0][0] == "info@invenio-software.org": send_email(FROMADDR, member_email[0][0],"Request for comment on document %s" % (RN),message) return "" def sendMailToProjectLeader(doctype, categ, RN, email, authors, actor, recommendation): item_details = get_brief_doc_details_from_repository(RN) ## get_brief_doc_details_from_repository has returned either None ## or a dictionary with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) message = """ Scientific Note approval for document %s has been submitted to the CERN Document Server. Your approval is requested for this document. Once you have received recommendations from both the referee and the publication committee chair, you will be able to make your decision. Requested subcategory: %s Title: %s Author(s): %s To access the document(s), select the file(s) from the location: <%s/%s/%s> The %s has made a recommendation for the document. He/she said the following: %s You can approve this document by visiting this page: <%s> You can also check the status of the document from: <%s> """ % (str(RN), str(categ), str(item_details['title']), authors, CFG_SITE_URL, CFG_SITE_RECORD, str(item_details['recid']), actor, recommendation, str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN+"&action=ProjectLeaderDecision"), str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN)) # send mails to all members of the ATLAS group send_email(FROMADDR, email,"Request for approval/rejection of document %s" % (RN),message) return "" def sendMailToSubmitter(doctype, categ, RN, outcome): item_details = get_brief_doc_details_from_repository(RN) ## get_brief_doc_details_from_repository has returned either None ## or a dictionary with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) message = """ The approval process for your document : %s, has been completed. The details of this document are as follows: Requested subcategory: %s Title: %s The project leader has made the following recommendation for the document: %s """ % (RN, categ, item_details['title'], outcome) # send mails to all members of the ATLAS group send_email(FROMADDR, item_details['email'],"Final outcome for approval of document : %s" % (RN),message) return "" def sendMailtoCommitteeChair(doctype, categ, RN, email, authors): item_details = get_brief_doc_details_from_repository(RN) ## get_brief_doc_details_from_repository has returned either None ## or a dictionary with the following structure: ## { 'title' : '-', ## String - the item's title ## 'recid' : '', ## String - recid ## 'report-number' : '', ## String - the item's report number ## 'authors' : [], ## List - the item's authors ## } FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) message = """ The referree assigned to the document detailed below has made a reccommendation. You are now requested to make a reccommendation of your own. Requested subcategory: %s Title: %s Author(s): %s To access the document(s), select the file(s) from the location: <%s/%s/%s> You can make a reccommendation by visiting this page: <%s> """ % (str(categ), str(item_details['title']), authors, CFG_SITE_URL, CFG_SITE_RECORD, str(item_details['recid']), str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN)) # send mails to all members of the ATLAS group send_email(FROMADDR, email,"Request for reccommendation of document %s" % (RN),message)