diff --git a/modules/webalert/lib/alert_engine.py b/modules/webalert/lib/alert_engine.py index 850e599c1..8c02e9bca 100644 --- a/modules/webalert/lib/alert_engine.py +++ b/modules/webalert/lib/alert_engine.py @@ -1,408 +1,408 @@ ## $Id$ ## Alert engine implementation. ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Alert engine implementation.""" ## rest of the Python code goes below __version__ = "$Id$" from cgi import parse_qs from sre import search, sub from time import localtime, strftime, mktime, sleep from string import split import smtplib import datetime from email.Header import Header from email.Message import Message from email.MIMEText import MIMEText from cdsware.config import * from cdsware.search_engine import perform_request_search from cdsware.dbquery import run_sql from cdsware.htmlparser import * import cdsware.template webalert_templates = cdsware.template.load('webalert') DEVELOPERADDR = [supportemail] # Debug levels: # 0 = production, nothing on the console, email sent # 1 = messages on the console, email sent # 2 = messages on the console, but no email sent # 3 = many messages on the console, no email sent # 4 = many messages on the console, email sent to DEVELOPERADDR DEBUGLEVEL = 0 def update_date_lastrun(alert): return run_sql('update user_query_basket set date_lastrun=%s where id_user=%s and id_query=%s and id_basket=%s;', (strftime("%Y-%m-%d"), alert[0], alert[1], alert[2],)) def get_alert_queries(frequency): return run_sql('select distinct id, urlargs from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.frequency=%s and uqb.date_lastrun <= now();', (frequency,)) def get_alert_queries_for_user(uid): return run_sql('select distinct id, urlargs, uqb.frequency from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.id_user=%s and uqb.date_lastrun <= now();', (uid,)) def get_alerts(query, frequency): r = run_sql('select id_user, id_query, id_basket, frequency, date_lastrun, alert_name, notification from user_query_basket where id_query=%s and frequency=%s;', (query['id_query'], frequency,)) return {'alerts': r, 'records': query['records'], 'argstr': query['argstr'], 'date_from': query['date_from'], 'date_until': query['date_until']} # def add_record_to_basket(record_id, basket_id): # if DEBUGLEVEL > 0: # print "-> adding record %s into basket %s" % (record_id, basket_id) # try: # return run_sql('insert into basket_record (id_basket, id_record) values(%s, %s);', (basket_id, record_id,)) # except: # return 0 # def add_records_to_basket(record_ids, basket_id): # # TBD: generate the list and all all records in one step (see below) # for i in record_ids: # add_record_to_basket(i, basket_id) # Optimized version: def add_records_to_basket(record_ids, basket_id): nrec = len(record_ids) if nrec > 0: vals = '(%s,%s)' % (basket_id, record_ids[0]) if nrec > 1: for i in record_ids[1:]: vals += ',(%s, %s)' % (basket_id, i) if DEBUGLEVEL > 0: print "-> adding %s records into basket %s: %s" % (nrec, basket_id, vals) try: if DEBUGLEVEL < 4: return run_sql('insert into basket_record (id_basket, id_record) values %s;' % vals) # Cannot use the run_sql(, (,)) form for some reason else: print ' NOT ADDED, DEBUG LEVEL == 4' return 0 except: return 0 else: return 0 def get_email(uid): r = run_sql('select email from user where id=%s', (uid,)) return r[0][0] def get_query(alert_id): r = run_sql('select urlargs from query where id=%s', (alert_id,)) return r[0][0] def send_email(fromaddr, toaddr, body, attempt=0): if attempt > 2: log('error sending email to %s: SMTP error; gave up after 3 attempts' % toaddr) return ery: server = smtplib.SMTP('localhost') if DEBUGLEVEL > 2: server.set_debuglevel(1) else: server.set_debuglevel(0) server.sendmail(fromaddr, toaddr, body) server.quit() except: if (DEBUGLEVEL > 1): print 'Error connecting to SMTP server, attempt %s retrying in 5 minutes. Exception raised: %s' % (attempt, sys.exc_info()[0]) sleep(300) send_email(fromaddr, toaddr, body, attempt+1) return def forge_email(fromaddr, toaddr, subject, content): msg = MIMEText(content, _charset='utf-8') msg['From'] = fromaddr msg['To'] = toaddr msg['Subject'] = Header(subject, 'utf-8') return msg.as_string() def email_notify(alert, records, argstr): if len(records) == 0: return msg = "" if DEBUGLEVEL > 0: msg = "*** THIS MESSAGE WAS SENT IN DEBUG MODE ***\n\n" url = weburl + "/search.py?" + argstr # Extract the pattern and catalogue list from the formatted query query = parse_qs(argstr) pattern = query.get('p', [''])[0] catalogues = query.get('c', []) frequency = alert[3] msg += webalert_templates.tmpl_alert_email_body( alert[5], url, records, pattern, catalogues, frequency) email = get_email(alert[0]) msg = MIMEText(msg, _charset='utf-8') msg['To'] = email # Let the template fill in missing fields webalert_templates.tmpl_alert_email_headers(alert[5], msg) sender = msg['From'] body = msg.as_string() if DEBUGLEVEL > 0: print "********************************************************************************" print body print "********************************************************************************" if DEBUGLEVEL < 2: send_email(sender, email, body) if DEBUGLEVEL == 4: for a in DEVELOPERADDR: send_email(sender, a, body) def get_argument(args, argname): if args.has_key(argname): return args[argname] else: return [] def _date_to_tuple(date): return [str(part) for part in (date.year, date.month, date.day)] def get_record_ids(argstr, date_from, date_until): args = parse_qs(argstr) p = get_argument(args, 'p') c = get_argument(args, 'c') cc = get_argument(args, 'cc') as = get_argument(args, 'as') f = get_argument(args, 'f') rg = get_argument(args, 'rg') so = get_argument(args, 'so') sp = get_argument(args, 'sp') ot = get_argument(args, 'ot') as = get_argument(args, 'as') p1 = get_argument(args, 'p1') f1 = get_argument(args, 'f1') m1 = get_argument(args, 'm1') op1 = get_argument(args, 'op1') p2 = get_argument(args, 'p2') f2 = get_argument(args, 'f2') m2 = get_argument(args, 'm2') op2 = get_argument(args, 'op2') p3 = get_argument(args, 'p3') f3 = get_argument(args, 'f3') m3 = get_argument(args, 'm3') sc = get_argument(args, 'sc') # search = get_argument(args, 'search') d1y, d1m, d1d = _date_to_tuple(date_from) d2y, d2m, d2d = _date_to_tuple(date_until) return perform_request_search(of='id', p=p, c=c, cc=cc, f=f, so=so, sp=sp, ot=ot, as=as, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, sc=sc, d1y=d1y, d1m=d1m, d1d=d1d, d2y=d2y, d2m=d2m, d2d=d2d) def get_argument_as_string(argstr, argname): args = parse_qs(argstr) a = get_argument(args, argname) r = '' if len(a): r = a[0] for i in a[1:len(a)]: r += ", %s" % i return r def get_pattern(argstr): return get_argument_as_string(argstr, 'p') def get_catalogue(argstr): return get_argument_as_string(argstr, 'c') def get_catalogue_num(argstr): args = parse_qs(argstr) a = get_argument(args, 'c') return len(a) def run_query(query, frequency, date_until): """Return a dictionary containing the information of the performed query. The information contains the id of the query, the arguments as a string, and the list of found records.""" if frequency == 'day': date_from = date_until - datetime.timedelta(days=1) elif frequency == 'week': date_from = date_until - datetime.timedelta(weeks=1) else: # Months are not an explicit notion of timedelta (it's the # most ambiguous too). So we explicitely take the same day of # the previous month. d, m, y = (date_until.day, date_until.month, date_until.year) m = m - 1 if m == 0: m = 12 y = y - 1 date_from = datetime.date(year=y, month=m, day=d) recs = get_record_ids(query[1], date_from, date_until) n = len(recs) if n: log('query %08s produced %08s records' % (query[0], len(recs))) if DEBUGLEVEL > 2: print "[%s] run query: %s with dates: from=%s, until=%s\n found rec ids: %s" % ( strftime("%c"), query, date_from, date_until, recs) return {'id_query': query[0], 'argstr': query[1], 'records': recs, 'date_from': date_from, 'date_until': date_until} def process_alert_queries(frequency, date): """Run the alerts according to the frequency. Retrieves the queries for which an alert exists, performs it, and processes the corresponding alerts.""" alert_queries = get_alert_queries(frequency) for aq in alert_queries: q = run_query(aq, frequency, date) alerts = get_alerts(q, frequency) process_alerts(alerts) def replace_argument(argstr, argname, argval): """Replace the given date argument value with the new one. If the argument is missing, it is added.""" if search('%s=\d+' % argname, argstr): r = sub('%s=\d+' % argname, '%s=%s' % (argname, argval), argstr) else: r = argstr + '&%s=%s' % (argname, argval) return r def update_arguments(argstr, date_from, date_until): """Replace date arguments in argstr with the ones specified by date_from and date_until. Absent arguments are added.""" d1y, d1m, d1d = _date_to_tuple(date_from) d2y, d2m, d2d = _date_to_tuple(date_until) r = replace_argument(argstr, 'd1y', d1y) r = replace_argument(r, 'd1m', d1m) r = replace_argument(r, 'd1d', d1d) r = replace_argument(r, 'd2y', d2y) r = replace_argument(r, 'd2m', d2m) r = replace_argument(r, 'd2d', d2d) return r def log(msg): try: log = open(logdir + '/alertengine.log', 'a') - log.write(strftime('%04Y%02m%02d%02H%02M%02S#')) + log.write(strftime('%Y%m%d%H%M%S#')) log.write(msg + '\n') log.close() except: pass def process_alerts(alerts): # TBD: do not generate the email each time, forge it once and then # send it to all appropriate people for a in alerts['alerts']: if alert_use_basket_p(a): add_records_to_basket(alerts['records'], a[2]) if alert_use_notification_p(a): argstr = update_arguments(alerts['argstr'], alerts['date_from'], alerts['date_until']) email_notify(a, alerts['records'], argstr) update_date_lastrun(a) def alert_use_basket_p(alert): return alert[2] != 0 def alert_use_notification_p(alert): return alert[6] == 'y' def run_alerts(date): """Run the alerts. First decide which alerts to run according to the current local time, and runs them.""" if date.day == 1: process_alert_queries('month', date) if date.isoweekday() == 1: # first day of the week process_alert_queries('week', date) process_alert_queries('day', date) def process_alert_queries_for_user(uid, date): """Process the alerts for the given user id. All alerts are with reference date set as the current local time.""" alert_queries = get_alert_queries_for_user(uid) print alert_queries for aq in alert_queries: frequency = aq[2] q = run_query(aq, frequency, date) alerts = get_alerts(q, frequency) process_alerts(alerts) if __name__ == '__main__': process_alert_queries_for_user(2571422) # erik process_alert_queries_for_user(109) # tibor # process_alert_queries_for_user(11040) # jean-yves diff --git a/modules/websearch/bin/webcoll.in b/modules/websearch/bin/webcoll.in index a81c98c0a..966bdebc7 100644 --- a/modules/websearch/bin/webcoll.in +++ b/modules/websearch/bin/webcoll.in @@ -1,1134 +1,1134 @@ #!@PYTHON@ ## -*- mode: python; coding: utf-8; -*- ## $Id$ ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Creates CDSware collection specific pages, using WML and MySQL configuration tables.""" __version__ = "$Id$" ## import modules: try: import calendar import copy import getopt import getpass import marshal import signal import sys import cgi import sre import os import math import string import urllib import zlib import MySQLdb import Numeric import time import traceback except ImportError, e: print "Error: %s" % e import sys sys.exit(1) try: from cdsware.config import * from cdsware.messages import gettext_set_language, language_list_long from cdsware.search_engine import HitSet, search_pattern, get_creation_date, nice_number, get_field_i18nname from cdsware.search_engine_config import cfg_author_et_al_threshold, cfg_instant_browse, cfg_max_recID, cfg_narrow_search_show_grandsons from cdsware.dbquery import run_sql from cdsware.access_control_engine import acc_authorize_action from cdsware.bibrank_record_sorter import get_bibrank_methods import cdsware.template websearch_templates = cdsware.template.load('websearch') except ImportError, e: print "Error: %s" % e import sys sys.exit(1) ## global vars collection_house = {} # will hold collections we treat in this run of the program; a dict of {collname2, collobject1}, ... options = {} # will hold task options # cfg_cache_last_updated_timestamp_tolerance -- cache timestamp # tolerance (in seconds), to account for the fact that an admin might # accidentally happen to edit the collection definitions at exactly # the same second when some webcoll process was about to be started. # In order to be safe, let's put an exaggerated timestamp tolerance # value such as 20 seconds: cfg_cache_last_updated_timestamp_tolerance = 20 # cfg_cache_last_updated_timestamp_file -- location of the cache # timestamp file: cfg_cache_last_updated_timestamp_file = "%s/collections/last_updated" % cachedir def get_collection(colname): """Return collection object from the collection house for given colname. If does not exist, then create it.""" if not collection_house.has_key(colname): colobject = Collection(colname) collection_house[colname] = colobject return collection_house[colname] ## auxiliary functions: def mymkdir(newdir, mode=0777): """works the way a good mkdir should :) - already exists, silently complete - regular file in the way, raise an exception - parent directory(ies) does not exist, make them as well """ if os.path.isdir(newdir): pass elif os.path.isfile(newdir): raise OSError("a file with the same name as the desired " \ "dir, '%s', already exists." % newdir) else: head, tail = os.path.split(newdir) if head and not os.path.isdir(head): mymkdir(head, mode) if tail: os.umask(022) os.mkdir(newdir, mode) def escape_string(s): "Escapes special chars in string. For MySQL queries." s = MySQLdb.escape_string(s) return s def is_selected(var, fld): "Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes." if var == fld: return " selected" else: return "" def write_message(msg, stream=sys.stdout): """Write message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff.""" if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) stream.write("%s\n" % msg) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) return def get_field(recID, tag): "Gets list of field 'tag' for the record with 'recID' system number." out = [] digit = tag[0:2] bx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" \ % (bx, bibx, recID, tag) res = run_sql(query) for row in res: out.append(row[0]) return out def print_record(recID, format='hb', ln=cdslang): "Prints record 'recID' formatted accoding to 'format'." out = "" # HTML brief format by default query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format) res = run_sql(query, None, 1) if res: # record 'recID' is formatted in 'format', so print it out += "%s" % zlib.decompress(res[0][0]) else: # record 'recID' does not exist in format 'format', so print some default format: # firstly, title: titles = get_field(recID, "245__a") # secondly, authors: authors = get_field(recID, "100__a") + get_field(recID, "700__a") # thirdly, date of creation: dates = get_field(recID, "260__c") # thirdly bis, report numbers: rns = get_field(recID, "037__a") + get_field(recID, "088__a") # fourthly, beginning of abstract: abstracts = get_field(recID, "520__a") # fifthly, fulltext link: urls_z = get_field(recID, "8564_z") urls_u = get_field(recID, "8564_u") out += websearch_templates.tmpl_record_body( weburl = weburl, titles = titles, authors = authors, dates = dates, rns = rns, abstracts = abstracts, urls_u = urls_u, urls_z = urls_z ) # at the end of HTML mode, print "Detailed record" and "Mark record" functions: out += websearch_templates.tmpl_record_links( weburl = weburl, recid = recID, ln = ln ) return out class Collection: "Holds the information on collections (id,name,dbquery)." def __init__(self, name=""): "Creates collection instance by querying the MySQL configuration database about 'name'." self.calculate_reclist_run_already = 0 # to speed things up wihtout much refactoring self.update_reclist_run_already = 0 # to speed things up wihtout much refactoring self.reclist_with_nonpublic_subcolls = HitSet() if not name: self.name = cdsname # by default we are working on the home page self.id = 1 self.dbquery = None self.nbrecs = None self.reclist = HitSet() else: self.name = name query = "SELECT id,name,dbquery,nbrecs,reclist FROM collection WHERE name='%s'" % escape_string(name) try: res = run_sql(query, None, 1) if res: self.id = res[0][0] self.name = res[0][1] self.dbquery = res[0][2] self.nbrecs = res[0][3] try: self.reclist = HitSet(Numeric.loads(zlib.decompress(res[0][5]))) except: self.reclist = HitSet() else: # collection does not exist! self.id = None self.dbquery = None self.nbrecs = None self.reclist = HitSet() except MySQLdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit(1) def get_name(self, ln=cdslang, name_type="ln", prolog="", epilog="", prolog_suffix=" ", epilog_suffix=""): """Return nicely formatted collection name for language LN. The NAME_TYPE may be 'ln' (=long name), 'sn' (=short name), etc.""" out = prolog i18name = "" res = run_sql("SELECT value FROM collectionname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, ln, name_type)) try: i18name += res[0][0] except IndexError: pass if i18name: out += i18name else: out += self.name out += epilog return out def get_ancestors(self): "Returns list of ancestors of the current collection." ancestors = [] id_son = self.id while 1: query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_son=%d AND c.id=cc.id_dad" % int(id_son) res = run_sql(query, None, 1) if res: col_ancestor = get_collection(res[0][1]) ancestors.append(col_ancestor) id_son = res[0][0] else: break ancestors.reverse() return ancestors def restricted_p(self): """Predicate to test if the collection is restricted or not. Return the contect of the `restrited' column of the collection table (typically Apache group). Otherwise return None if the collection is public.""" out = None query = "SELECT restricted FROM collection WHERE id=%d" % self.id res = run_sql(query, None, 1) try: out = res[0][0] except: pass return out def get_sons(self, type='r'): "Returns list of direct sons of type 'type' for the current collection." sons = [] id_dad = self.id query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC, c.name ASC" % (int(id_dad), type) res = run_sql(query) for row in res: sons.append(get_collection(row[1])) return sons def get_descendants(self, type='r'): "Returns list of all descendants of type 'type' for the current collection." descendants = [] id_dad = self.id query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC" % (int(id_dad), type) res = run_sql(query) for row in res: col_desc = get_collection(row[1]) descendants.append(col_desc) descendants += col_desc.get_descendants() return descendants def write_cache_file(self, filename='', filebody=''): "Write a file inside collection cache." # open file: dirname = "%s/collections/%d" % (cachedir, self.id) mymkdir(dirname) fullfilename = dirname + "/%s.html" % filename try: os.umask(022) f = open(fullfilename, "w") except IOError, v: try: (code, message) = v except: code = 0 message = v print "I/O Error: " + str(message) + " (" + str(code) + ")" sys.exit(1) # print user info: if options["verbose"] >= 6: write_message("... creating %s" % fullfilename) sys.stdout.flush() # print page body: f.write(filebody) # close file: f.close() def update_webpage_cache(self): """Create collection page header, navtrail, body (including left and right stripes) and footer, and call write_cache_file() afterwards to update the collection webpage cache.""" ## do this for each language: for lang, lang_fullname in language_list_long(): # load the right message language _ = gettext_set_language(lang) ## first, update navtrail: for as in range(0,2): self.write_cache_file("navtrail-as=%s-ln=%s" % (as, lang), self.create_navtrail_links(as, lang)) ## second, update page body: for as in range(0,2): # do both simple search and advanced search pages: body = websearch_templates.tmpl_webcoll_body( weburl = weburl, te_portalbox = self.create_portalbox(lang, 'te'), searchfor = self.create_searchfor(as, lang), np_portalbox = self.create_portalbox(lang, 'np'), narrowsearch = self.create_narrowsearch(as, lang, _("Narrow by collection:")), focuson = self.create_narrowsearch(as, lang, _("Focus on:"), "v"), ne_portalbox = self.create_portalbox(lang, 'ne') ) self.write_cache_file("body-as=%s-ln=%s" % (as, lang), body) ## third, write portalboxes: self.write_cache_file("portalbox-tp-ln=%s" % lang, self.create_portalbox(lang, "tp")) self.write_cache_file("portalbox-te-ln=%s" % lang, self.create_portalbox(lang, "te")) self.write_cache_file("portalbox-lt-ln=%s" % lang, self.create_portalbox(lang, "lt")) self.write_cache_file("portalbox-rt-ln=%s" % lang, self.create_portalbox(lang, "rt")) ## fourth, write 'last updated' information: - self.write_cache_file("last-updated-ln=%s" % lang, time.strftime("%02d %b %04Y %02H:%02M:%02S %Z", time.localtime())) + self.write_cache_file("last-updated-ln=%s" % lang, time.strftime("%d %b %Y %H:%M:%S %Z", time.localtime())) return def create_navtrail_links(self, \ as=0, ln=cdslang, separator=" > "): """Creates navigation trail links, i.e. links to collection ancestors (except Home collection). If as==1, then links to Advanced Search interfaces; otherwise Simple Search. """ dads = [] for dad in self.get_ancestors(): if dad.name != cdsname: # exclude Home collection dads.append ((dad.name, dad.get_name(ln))) return websearch_templates.tmpl_navtrail_links( as = as, ln = ln, weburl = weburl, separator = separator, dads = dads) def create_nbrecs_info(self, ln=cdslang, prolog = None, epilog = None): "Return information on the number of records." return websearch_templates.tmpl_nbrecs_info (number = nice_number (self.nbrecs, ln), prolog = prolog, epilog = epilog) def create_portalbox(self, lang=cdslang, position="rt"): """Creates portalboxes of language CDSLANG of the position POSITION by consulting MySQL configuration database. The position may be: 'lt'='left top', 'rt'='right top', etc.""" out = "" query = "SELECT p.title,p.body FROM portalbox AS p, collection_portalbox AS cp "\ " WHERE cp.id_collection=%d AND p.id=cp.id_portalbox AND cp.ln='%s' AND cp.position='%s' "\ " ORDER BY cp.score DESC" % (self.id, lang, position) res = run_sql(query) for row in res: title, body = row[0], row[1] if title: out += websearch_templates.tmpl_portalbox(title = title, body = body) else: # no title specified, so print body ``as is'' only: out += body return out def create_narrowsearch(self, as=0, ln=cdslang, title="Narrow search", type="r"): """Creates list of collection descendants of type 'type' under title 'title'. If as==1, then links to Advanced Search interfaces; otherwise Simple Search. Suitable for 'Narrow search' and 'Focus on' boxes.""" # get list of sons and analyse it sons = self.get_sons(type) # return nothing for type 'v' (virtual collection) if there are no sons: if type == 'v' and not sons: return "" # load instant browse parts, in case no descendants if not len(sons) and type == 'r': instant_browse = self.create_instant_browse(ln=ln) else: instant_browse = '' # get descendents descendants = self.get_descendants(type) grandsons = [] if cfg_narrow_search_show_grandsons: # load grandsons for each son for son in sons: grandsons.append(son.get_sons()) # return "" return websearch_templates.tmpl_narrowsearch( as = as, ln = ln, weburl = weburl, type = type, father = self, has_grandchildren = len(descendants)>len(sons), title = title, instant_browse = instant_browse, sons = sons, display_grandsons = cfg_narrow_search_show_grandsons, grandsons = grandsons ) def create_instant_browse(self, rg=cfg_instant_browse, ln=cdslang): "Searches database and produces list of last 'rg' records." box = "" if self.restricted_p(): return websearch_templates.tmpl_box_restricted_content(ln = ln) else: url = "%s/search.py?cc=%s&jrec=%d" % (weburl, urllib.quote_plus(self.name), rg+1) if self.nbrecs and self.reclist: # firstly, get last 'rg' records: recIDs = Numeric.nonzero(self.reclist._set) passIDs = [] for idx in range(self.nbrecs-1, self.nbrecs-rg-1, -1): if idx>=0: passIDs.append({'id' : recIDs[idx], 'body' : print_record(recIDs[idx], ln=ln) }) if not (self.nbrecs > rg): url = "" recdates = [] for recid in passIDs: recid['date'] = get_creation_date(recid['id'], fmt="%Y-%m-%d
%H:%i") return websearch_templates.tmpl_instant_browse( ln = ln, recids = passIDs, more_link = url ) else: return websearch_templates.tmpl_box_no_records(ln = ln) return box def create_searchoptions(self): "Produces 'Search options' portal box." box="" query = """SELECT DISTINCT(cff.id_field),f.code,f.name FROM collection_field_fieldvalue AS cff, field AS f WHERE cff.id_collection=%d AND cff.id_fieldvalue IS NOT NULL AND cff.id_field=f.id ORDER BY cff.score DESC""" % self.id res = run_sql(query) if res: for row in res: field_id = row[0] field_code = row[1] field_name = row[2] query_bis = """SELECT fv.value,fv.name FROM fieldvalue AS fv, collection_field_fieldvalue AS cff WHERE cff.id_collection=%d AND cff.type='seo' AND cff.id_field=%d AND fv.id=cff.id_fieldvalue ORDER BY cff.score_fieldvalue DESC, cff.score DESC, fv.name ASC""" % (self.id, field_id) res_bis = run_sql(query_bis) if res_bis: values = [{'value' : '', 'text' : 'any' + field_name}] # @todo internationalisation of "any" for row_bis in res_bis: values.append({'value' : cgi.escape(row_bis[0], 1), 'text' : row_bis[1]}) box += websearch_templates.tmpl_select( fieldname = field_code, values = values ) return box def create_sortoptions(self, ln=cdslang): "Produces 'Sort options' portal box." # load the right message language _ = gettext_set_language(ln) box="" query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE id_collection=%d AND cff.type='soo' AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""" % self.id values = [{'value' : '', 'text': "- %s -" % _("latest first")}] res = run_sql(query) if res: for row in res: values.append({'value' : row[0], 'text': row[1]}) else: for tmp in ('title', 'author', 'report number', 'year'): values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)}) box = websearch_templates.tmpl_select( fieldname = 'sf', css_class = 'address', values = values ) box += websearch_templates.tmpl_select( fieldname = 'so', css_class = 'address', values = [ {'value' : 'a' , 'text' : _("asc.")}, {'value' : 'd' , 'text' : _("desc.")} ] ) return box def create_rankoptions(self, ln=cdslang): "Produces 'Rank options' portal box." # load the right message language _ = gettext_set_language(ln) values = [{'value' : '', 'text': "- %s %s -" % (string.lower(_("OR")), _("rank by"))}] for (code,name) in get_bibrank_methods(self.id, ln): values.append({'value' : code, 'text': name}) box = websearch_templates.tmpl_select( fieldname = 'sf', css_class = 'address', values = values ) return box def create_displayoptions(self, ln=cdslang): "Produces 'Display options' portal box." # load the right message language _ = gettext_set_language(ln) values = [] for i in ['10', '25', '50', '100', '250', '500']: values.append({'value' : i, 'text' : i + ' ' + _("results")}) box = websearch_templates.tmpl_select( fieldname = 'rg', css_class = 'address', values = values ) if self.get_sons(): box += websearch_templates.tmpl_select( fieldname = 'sc', css_class = 'address', values = [ {'value' : '1' , 'text' : _("split by collection")}, {'value' : '0' , 'text' : _("single list")} ] ) return box def create_formatoptions(self, ln=cdslang): "Produces 'Output format options' portal box." # load the right message language _ = gettext_set_language(ln) box = "" values = [] query = """SELECT f.code,f.name FROM format AS f, collection_format AS cf WHERE cf.id_collection=%d AND cf.id_format=f.id ORDER BY cf.score DESC, f.name ASC""" % self.id res = run_sql(query) if res: for row in res: values.append({'value' : row[0], 'text': row[1]}) else: values.append({'value' : 'hb', 'text' : "HTML %s" % _("brief")}) box = websearch_templates.tmpl_select( fieldname = 'of', css_class = 'address', values = values ) return box def create_searchwithin_selection_box(self, fieldname='f', value='', ln='en'): "Produces 'search within' selection box for the current collection." # get values query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='sew' AND cff.id_collection=%d AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""" % self.id res = run_sql(query) values = [{'value' : '', 'text' : get_field_i18nname("any field", ln)}] if res: for row in res: values.append({'value' : row[0], 'text' : row[1]}) else: if cfg_cern_site: for tmp in ['title', 'author', 'abstract', 'report number', 'year']: values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)}) else: for tmp in ['title', 'author', 'abstract', 'keyword', 'report number', 'year', 'fulltext', 'reference']: values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)}) return websearch_templates.tmpl_searchwithin_select( fieldname = fieldname, ln = ln, selected = value, values = values ) def create_searchexample(self): "Produces search example(s) for the current collection." out = "$collSearchExamples = getSearchExample(%d, $se);" % self.id return out def create_searchfor(self, as=0, ln=cdslang): "Produces either Simple or Advanced 'Search for' box for the current collection." if as == 1: return self.create_searchfor_advanced(ln) else: return self.create_searchfor_simple(ln) def create_searchfor_simple(self, ln=cdslang): "Produces simple 'Search for' box for the current collection." # load the right message language _ = gettext_set_language(ln) if self.name != cdsname: ssearchurl = "?c=%s&as=0&ln=%s" % (urllib.quote_plus(self.name), ln) asearchurl = "?c=%s&as=1&ln=%s" % (urllib.quote_plus(self.name), ln) else: # hide cdsname for aesthetical reasons ssearchurl = "?as=0&ln=%s" % ln asearchurl = "?as=1&ln=%s" % ln return websearch_templates.tmpl_searchfor_simple( ln = ln, weburl = weburl, asearchurl = asearchurl, header = _("Search %s records for:") % self.create_nbrecs_info(ln, "",""), middle_option = self.create_searchwithin_selection_box(ln=ln), ) def create_searchfor_advanced(self, ln=cdslang): "Produces advanced 'Search for' box for the current collection." # load the right message language _ = gettext_set_language(ln) if self.name != cdsname: ssearchurl = "?c=%s&as=0&ln=%s" % (urllib.quote_plus(self.name), ln) asearchurl = "?c=%s&as=1&ln=%s" % (urllib.quote_plus(self.name), ln) else: # hide cdsname for aesthetical reasons ssearchurl = "?as=0&ln=%s" % ln asearchurl = "?as=1&ln=%s" % ln return websearch_templates.tmpl_searchfor_advanced( ln = ln, weburl = weburl, ssearchurl = ssearchurl, header = _("Search %s records for:") % self.create_nbrecs_info(ln, "",""), middle_option_1 = self.create_searchwithin_selection_box('f1', ln=ln), middle_option_2 = self.create_searchwithin_selection_box('f2', ln=ln), middle_option_3 = self.create_searchwithin_selection_box('f3', ln=ln), searchoptions = self.create_searchoptions(), sortoptions = self.create_sortoptions(ln), rankoptions = self.create_rankoptions(ln), displayoptions = self.create_displayoptions(ln), formatoptions = self.create_formatoptions(ln) ) def calculate_reclist(self): """Calculate, set and return the (reclist, reclist_with_nonpublic_subcolls) tuple for given collection.""" if self.calculate_reclist_run_already: # do we have to recalculate? return (self.reclist, self.reclist_with_nonpublic_subcolls) if options["verbose"] >= 6: write_message("... calculating reclist of %s" % self.name) reclist = HitSet() # will hold results for public sons only; good for storing into DB reclist_with_nonpublic_subcolls = HitSet() # will hold results for both public and nonpublic sons; good for deducing total # number of documents if not self.dbquery: # A - collection does not have dbquery, so query recursively all its sons # that are either non-restricted or that have the same restriction rules for coll in self.get_sons(): coll_reclist, coll_reclist_with_nonpublic_subcolls = coll.calculate_reclist() if ((coll.restricted_p() is None) or (coll.restricted_p() == self.restricted_p())): # add this reclist ``for real'' only if it is public reclist.union(coll_reclist) reclist_with_nonpublic_subcolls.union(coll_reclist_with_nonpublic_subcolls) else: # B - collection does have dbquery, so compute it: reclist = search_pattern(None,self.dbquery) reclist_with_nonpublic_subcolls = copy.deepcopy(reclist) # deduce the number of records: reclist.calculate_nbhits() reclist_with_nonpublic_subcolls.calculate_nbhits() # store the results: self.nbrecs = reclist_with_nonpublic_subcolls._nbhits self.reclist = reclist self.reclist_with_nonpublic_subcolls = reclist_with_nonpublic_subcolls # last but not least, update the speed-up flag: self.calculate_reclist_run_already = 1 # return the two sets: return (self.reclist, self.reclist_with_nonpublic_subcolls) def update_reclist(self): "Update the record universe for given collection; nbrecs, reclist of the collection table." if self.update_reclist_run_already: # do we have to reupdate? return 0 if options["verbose"] >= 6: write_message("... updating reclist of %s (%s recs)" % (self.name, self.nbrecs)) sys.stdout.flush() try: query = "UPDATE collection SET nbrecs=%d, reclist='%s' WHERE id=%d" % \ (self.nbrecs, escape_string(zlib.compress(Numeric.dumps(self.reclist._set))), self.id) res = run_sql(query) self.reclist_updated_since_start = 1 except MySQLdb.Error, e: print "Database Query Error %d: %s." % (e.args[0], e.args[1]) sys.exit(1) # last but not least, update the speed-up flag: self.update_reclist_run_already = 1 return 0 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"): """Returns a date string according to the format string. It can handle normal date strings and shifts with respect to now.""" date = time.time() shift_re=sre.compile("([-\+]{0,1})([\d]+)([dhms])") factors = {"d":24*3600, "h":3600, "m":60, "s":1} m = shift_re.match(var) if m: sign = m.groups()[0] == "-" and -1 or 1 factor = factors[m.groups()[2]] value = float(m.groups()[1]) date = time.localtime(date + sign * factor * value) date = time.strftime(format_string, date) else: date = time.strptime(var, format_string) date = time.strftime(format_string, date) return date def get_current_time_timestamp(): """Return timestamp corresponding to the current time.""" - return time.strftime("%04Y-%02m-%02d %02H:%02M:%02S", time.localtime()) + return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) def compare_timestamps_with_tolerance(timestamp1, timestamp2, tolerance=0): """Compare two timestamps TIMESTAMP1 and TIMESTAMP2, of the form '2005-03-31 17:37:26'. Optionally receives a TOLERANCE argument (in seconds). Return -1 if TIMESTAMP1 is less than TIMESTAMP2 minus TOLERANCE, 0 if they are equal within TOLERANCE limit, and 1 if TIMESTAMP1 is greater than TIMESTAMP2 plus TOLERANCE. """ # remove any trailing .00 in timestamps: timestamp1 = sre.sub(r'\.[0-9]+$', '', timestamp1) timestamp2 = sre.sub(r'\.[0-9]+$', '', timestamp2) # first convert timestamps to Unix epoch seconds: timestamp1_seconds = calendar.timegm(time.strptime(timestamp1, "%Y-%m-%d %H:%M:%S")) timestamp2_seconds = calendar.timegm(time.strptime(timestamp2, "%Y-%m-%d %H:%M:%S")) # now compare them: if timestamp1_seconds < timestamp2_seconds - tolerance: return -1 elif timestamp1_seconds > timestamp2_seconds + tolerance: return 1 else: return 0 def get_database_last_updated_timestamp(): """Return last updated timestamp for collection-related and record-related database tables. """ database_tables_timestamps = [] database_tables_timestamps.extend(map(lambda x: str(x[11]), run_sql("SHOW TABLE STATUS LIKE 'bibrec'"))) database_tables_timestamps.extend(map(lambda x: str(x[11]), run_sql("SHOW TABLE STATUS LIKE 'bibfmt'"))) database_tables_timestamps.extend(map(lambda x: str(x[11]), run_sql("SHOW TABLE STATUS LIKE 'idxWORD%%'"))) database_tables_timestamps.extend(map(lambda x: str(x[11]), run_sql("SHOW TABLE STATUS LIKE 'collection%%'"))) database_tables_timestamps.extend(map(lambda x: str(x[11]), run_sql("SHOW TABLE STATUS LIKE 'portalbox'"))) database_tables_timestamps.extend(map(lambda x: str(x[11]), run_sql("SHOW TABLE STATUS LIKE 'field%%'"))) database_tables_timestamps.extend(map(lambda x: str(x[11]), run_sql("SHOW TABLE STATUS LIKE 'format%%'"))) database_tables_timestamps.extend(map(lambda x: str(x[11]), run_sql("SHOW TABLE STATUS LIKE 'rnkMETHODNAME'"))) return max(database_tables_timestamps) def get_cache_last_updated_timestamp(): """Return last updated cache timestamp.""" try: f = open(cfg_cache_last_updated_timestamp_file, "r") except: return "1970-01-01 00:00:00" timestamp = f.read() f.close() return timestamp def set_cache_last_updated_timestamp(timestamp): """Set last updated cache timestamp to TIMESTAMP.""" try: f = open(cfg_cache_last_updated_timestamp_file, "w") except: pass f.write(timestamp) f.close() return timestamp def write_message(msg, stream=sys.stdout): """Prints message and flush output stream (may be sys.stdout or sys.stderr).""" if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) stream.write("%s\n" % msg) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) def task_sig_sleep(sig, frame): """Signal handler for the 'sleep' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("sleeping...") task_update_status("SLEEPING") signal.pause() # wait for wake-up signal def task_sig_wakeup(sig, frame): """Signal handler for the 'wakeup' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("continuing...") task_update_status("CONTINUING") def task_sig_stop(sig, frame): """Signal handler for the 'stop' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("stopping...") task_update_status("STOPPING") pass # FIXME: is there anything to be done? task_update_status("STOPPED") sys.exit(0) def task_sig_suicide(sig, frame): """Signal handler for the 'suicide' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("got signal %d" % sig) write_message("suiciding myself now...") task_update_status("SUICIDING") write_message("suicided") task_update_status("SUICIDED") sys.exit(0) def task_sig_unknown(sig, frame): """Signal handler for the other unknown signals sent by shell or user.""" write_message("unknown signal %d ignored" % sig) # do nothing for other signals def authenticate(user, header="WebColl Task Submission", action="runwebcoll"): """Authenticate the user against the user database. Check for its password, if it exists. Check for action access rights. Return user name upon authorization success, do system exit upon authorization failure. """ print header print "=" * len(header) if user == "": print >> sys.stdout, "\rUsername: ", user = string.strip(string.lower(sys.stdin.readline())) else: print >> sys.stdout, "\rUsername: ", user ## first check user pw: res = run_sql("select id,password from user where email=%s", (user,), 1) if not res: print "Sorry, %s does not exist." % user sys.exit(1) else: (uid_db, password_db) = res[0] if password_db: password_entered = getpass.getpass() if password_db == password_entered: pass else: print "Sorry, wrong credentials for %s." % user sys.exit(1) ## secondly check authorization for the action: (auth_code, auth_message) = acc_authorize_action(uid_db, action) if auth_code != 0: print auth_message sys.exit(1) return user def task_submit(options): """Submits task to the BibSched task queue. This is what people will be invoking via command line.""" ## sanity check: remove eventual "task" option: if options.has_key("task"): del options["task"] ## authenticate user: user = authenticate(options.get("user", "")) ## submit task: if options["verbose"] >= 9: print "" write_message("storing task options %s\n" % options) task_id = run_sql("""INSERT INTO schTASK (id,proc,user,runtime,sleeptime,status,arguments) VALUES (NULL,'webcoll',%s,%s,%s,'WAITING',%s)""", (user, options["runtime"], options["sleeptime"], marshal.dumps(options))) ## update task number: options["task"] = task_id run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options),task_id)) write_message("Task #%d submitted." % task_id) return task_id def task_update_progress(msg): """Updates progress information in the BibSched task table.""" global task_id return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, task_id)) def task_update_status(val): """Updates status information in the BibSched task table.""" global task_id return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, task_id)) def task_read_status(task_id): """Read status information in the BibSched task table.""" res = run_sql("SELECT status FROM schTASK where id=%s", (task_id,), 1) try: out = res[0][0] except: out = 'UNKNOWN' return out def task_get_options(id): """Returns options for the task 'id' read from the BibSched task queue table.""" out = {} res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc='webcoll'", (id,)) try: out = marshal.loads(res[0][0]) except: write_message("Error: WebColl task %d does not seem to exist." % id) sys.exit(1) return out def task_run(): """Run the WebColl task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. The task will update collection reclist cache and collection web pages for given collection. (default is all). Arguments described in usage() function. Return 1 in case of success and 0 in case of failure.""" global task_id, options task_run_start_timestamp = get_current_time_timestamp() options = task_get_options(task_id) # get options from BibSched task table ## check task id: if not options.has_key("task"): write_message("Error: The task #%d does not seem to be a WebColl task." % task_id) return 0 ## check task status: task_status = task_read_status(task_id) if task_status != "WAITING": write_message("Error: The task #%d is %s. I expected WAITING." % (task_id, task_status)) return 0 ## we can run the task now: if options["verbose"]: write_message("Task #%d started." % task_id) task_update_status("RUNNING") ## initialize signal handler: signal.signal(signal.SIGUSR1, task_sig_sleep) signal.signal(signal.SIGTERM, task_sig_stop) signal.signal(signal.SIGABRT, task_sig_suicide) signal.signal(signal.SIGCONT, task_sig_wakeup) signal.signal(signal.SIGINT, task_sig_unknown) colls = [] # decide whether we need to run or not, by comparing last updated timestamps: if options["verbose"] >= 3: write_message("Database timestamp is %s." % get_database_last_updated_timestamp()) write_message("Collection cache timestamp is %s." % get_cache_last_updated_timestamp()) if options.has_key("force") or \ compare_timestamps_with_tolerance(get_database_last_updated_timestamp(), get_cache_last_updated_timestamp(), cfg_cache_last_updated_timestamp_tolerance) >= 0: ## either forced update was requested or cache is not up to date, so recreate it: # firstly, decide which collections to do: if options.has_key("collection"): coll = get_collection(options["collection"]) if coll.id == None: usage(1, 'Collection %s does not exist' % coll.name) colls.append(coll) else: res = run_sql("SELECT name FROM collection ORDER BY id") for row in res: colls.append(get_collection(row[0])) # secondly, update collection reclist cache: i = 0 for coll in colls: i += 1 if options["verbose"]: write_message("%s / reclist cache update" % coll.name) coll.calculate_reclist() coll.update_reclist() task_update_progress("Part 1/2: done %d/%d" % (i,len(colls))) # thirdly, update collection webpage cache: i = 0 for coll in colls: i += 1 if options["verbose"]: write_message("%s / web cache update" % coll.name) coll.update_webpage_cache() task_update_progress("Part 2/2: done %d/%d" % (i,len(colls))) # finally update the cache last updated timestamp: # (but only when all collections were updated, not when only # some of them were forced-updated as per admin's demand) if not options.has_key("collection"): set_cache_last_updated_timestamp(task_run_start_timestamp) if options["verbose"] >= 3: write_message("Collection cache timestamp is set to %s." % get_cache_last_updated_timestamp()) else: ## cache up to date, we don't have to run if options["verbose"]: write_message("Collection cache is up to date, no need to run.") pass ## we are done: task_update_progress("Done.") task_update_status("DONE") if options["verbose"]: write_message("Task #%d finished." % task_id) return 1 def usage(exitcode=1, msg=""): """Prints usage info.""" if msg: sys.stderr.write("Error: %s.\n" % msg) sys.stderr.write("Usage: %s [options]\n" % sys.argv[0]) sys.stderr.write("Command options:\n") sys.stderr.write(" -c, --collection\t Update cache for the given collection only. [all]\n") sys.stderr.write(" -f, --force\t Force update even if cache is up to date. [no]\n") sys.stderr.write("Scheduling options:\n") sys.stderr.write(" -u, --user=USER \t User name to submit the task as, password needed.\n") sys.stderr.write(" -t, --runtime=TIME \t Time to execute the task (now), e.g.: +15s, 5m, 3h, 2002-10-27 13:57:26\n") sys.stderr.write(" -s, --sleeptime=SLEEP \t Sleeping frequency after which to repeat task (no), e.g.: 30m, 2h, 1d\n") sys.stderr.write("General options:\n") sys.stderr.write(" -h, --help \t\t Print this help.\n") sys.stderr.write(" -V, --version \t\t Print version information.\n") sys.stderr.write(" -v, --verbose=LEVEL \t Verbose level (from 0 to 9, default 1).\n") sys.stderr.write("""Description: %s updates the collection cache (record universe for a given collection plus web page elements) based on WML and MySQL configuration parameters. If the collection name is passed as the second argument, it'll update this collection only. If the collection name is immediately followed by a plus sign, it will also update all its desdendants. The top-level collection name may be entered as the void string.\n""" % sys.argv[0]) sys.exit(exitcode) def main(): """Main function that analyzes command line input and calls whatever is appropriate. Useful for learning on how to write BibSched tasks.""" global task_id ## parse command line: if len(sys.argv) == 2 and sys.argv[1].isdigit(): ## A - run the task task_id = int(sys.argv[1]) try: if not task_run(): write_message("Error occurred. Exiting.", sys.stderr) except StandardError, e: write_message("Unexpected error occurred: %s." % e, sys.stderr) write_message("Traceback is:", sys.stderr) traceback.print_tb(sys.exc_info()[2]) write_message("Exiting.", sys.stderr) task_update_status("ERROR") else: ## B - submit the task # set default values: options["runtime"] = time.strftime("%Y-%m-%d %H:%M:%S") options["verbose"] = 1 options["sleeptime"] = "" # set user-defined options: try: opts, args = getopt.getopt(sys.argv[1:], "hVv:u:s:t:c:f", ["help", "version", "verbose=","user=","sleep=","time=","collection=","force"]) except getopt.GetoptError, err: usage(1, err) try: for opt in opts: if opt[0] in ["-h", "--help"]: usage(0) elif opt[0] in ["-V", "--version"]: print __version__ sys.exit(0) elif opt[0] in [ "-u", "--user"]: options["user"] = opt[1] elif opt[0] in ["-v", "--verbose"]: options["verbose"] = int(opt[1]) elif opt[0] in [ "-s", "--sleeptime" ]: get_datetime(opt[1]) # see if it is a valid shift options["sleeptime"] = opt[1] elif opt[0] in [ "-t", "--runtime" ]: options["runtime"] = get_datetime(opt[1]) elif opt[0] in [ "-c", "--collection"]: options["collection"] = opt[1] elif opt[0] in [ "-f", "--force"]: options["force"] = 1 else: usage(1) except StandardError, e: usage(e) task_submit(options) return ### okay, here we go: if __name__ == '__main__': main() diff --git a/modules/websearch/lib/search_engine.py b/modules/websearch/lib/search_engine.py index b1412ef13..0df997e53 100644 --- a/modules/websearch/lib/search_engine.py +++ b/modules/websearch/lib/search_engine.py @@ -1,3505 +1,3505 @@ # -*- coding: utf-8 -*- ## $Id$ ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """CDSware Search Engine in mod_python.""" __lastupdated__ = """$Date$""" __version__ = "$Id$" ## import general modules: import cgi import copy import Cookie import cPickle import marshal import fileinput import getopt import string from string import split import os import sre import sys import time import traceback import urllib import zlib import MySQLdb import Numeric import md5 import base64 import unicodedata from xml.dom import minidom ## import CDSware stuff: from cdsware.config import * from cdsware.search_engine_config import * from cdsware.bibrank_record_sorter import get_bibrank_methods,rank_records from cdsware.bibrank_downloads_similarity import register_page_view_event, calculate_reading_similarity_list if cfg_experimental_features: from cdsware.bibrank_citation_searcher import calculate_cited_by_list, calculate_co_cited_with_list from cdsware.bibrank_citation_grapher import create_citation_history_graph_and_box from cdsware.bibrank_downloads_grapher import create_download_history_graph_and_box from cdsware.dbquery import run_sql try: from mod_python import apache from cdsware.webuser import getUid from cdsware.webpage import pageheaderonly, pagefooteronly, create_error_box except ImportError, e: pass # ignore user personalisation, needed e.g. for command-line from cdsware.messages import gettext_set_language, wash_language try: import cdsware.template websearch_templates = cdsware.template.load('websearch') except: pass ## global vars: search_cache = {} # will cache results of previous searches cfg_nb_browse_seen_records = 100 # limit of the number of records to check when browsing certain collection cfg_nicely_ordered_collection_list = 0 # do we propose collection list nicely ordered or alphabetical? ## precompile some often-used regexp for speed reasons: sre_word = sre.compile('[\s]') sre_quotes = sre.compile('[\'\"]') sre_doublequote = sre.compile('\"') sre_equal = sre.compile('\=') sre_logical_and = sre.compile('\sand\s', sre.I) sre_logical_or = sre.compile('\sor\s', sre.I) sre_logical_not = sre.compile('\snot\s', sre.I) sre_operators = sre.compile(r'\s([\+\-\|])\s') sre_pattern_wildcards_at_beginning = sre.compile(r'(\s)[\*\%]+') sre_pattern_single_quotes = sre.compile("'(.*?)'") sre_pattern_double_quotes = sre.compile("\"(.*?)\"") sre_pattern_regexp_quotes = sre.compile("\/(.*?)\/") sre_pattern_short_words = sre.compile(r'([\s\"]\w{1,3})[\*\%]+') sre_pattern_space = sre.compile("__SPACE__") sre_pattern_today = sre.compile("\$TODAY\$") sre_unicode_lowercase_a = sre.compile(unicode(r"(?u)[áàäâãå]", "utf-8")) sre_unicode_lowercase_ae = sre.compile(unicode(r"(?u)[æ]", "utf-8")) sre_unicode_lowercase_e = sre.compile(unicode(r"(?u)[éèëê]", "utf-8")) sre_unicode_lowercase_i = sre.compile(unicode(r"(?u)[íìïî]", "utf-8")) sre_unicode_lowercase_o = sre.compile(unicode(r"(?u)[óòöôõø]", "utf-8")) sre_unicode_lowercase_u = sre.compile(unicode(r"(?u)[úùüû]", "utf-8")) sre_unicode_lowercase_y = sre.compile(unicode(r"(?u)[ýÿ]", "utf-8")) sre_unicode_lowercase_c = sre.compile(unicode(r"(?u)[çć]", "utf-8")) sre_unicode_lowercase_n = sre.compile(unicode(r"(?u)[ñ]", "utf-8")) sre_unicode_uppercase_a = sre.compile(unicode(r"(?u)[ÁÀÄÂÃÅ]", "utf-8")) sre_unicode_uppercase_ae = sre.compile(unicode(r"(?u)[Æ]", "utf-8")) sre_unicode_uppercase_e = sre.compile(unicode(r"(?u)[ÉÈËÊ]", "utf-8")) sre_unicode_uppercase_i = sre.compile(unicode(r"(?u)[ÍÌÏÎ]", "utf-8")) sre_unicode_uppercase_o = sre.compile(unicode(r"(?u)[ÓÒÖÔÕØ]", "utf-8")) sre_unicode_uppercase_u = sre.compile(unicode(r"(?u)[ÚÙÜÛ]", "utf-8")) sre_unicode_uppercase_y = sre.compile(unicode(r"(?u)[Ý]", "utf-8")) sre_unicode_uppercase_c = sre.compile(unicode(r"(?u)[ÇĆ]", "utf-8")) sre_unicode_uppercase_n = sre.compile(unicode(r"(?u)[Ñ]", "utf-8")) def get_alphabetically_ordered_collection_list(collid=1, level=0): """Returns nicely ordered (score respected) list of collections, more exactly list of tuples (collection name, printable collection name). Suitable for create_search_box().""" out = [] query = "SELECT id,name FROM collection ORDER BY name ASC" res = run_sql(query) for c_id, c_name in res: # make a nice printable name (e.g. truncate c_printable for for long collection names): if len(c_name)>30: c_printable = c_name[:30] + "..." else: c_printable = c_name if level: c_printable = " " + level * '-' + " " + c_printable out.append([c_name, c_printable]) return out def get_nicely_ordered_collection_list(collid=1, level=0): """Returns nicely ordered (score respected) list of collections, more exactly list of tuples (collection name, printable collection name). Suitable for create_search_box().""" colls_nicely_ordered = [] query = "SELECT c.name,cc.id_son FROM collection_collection AS cc, collection AS c "\ " WHERE c.id=cc.id_son AND cc.id_dad='%s' ORDER BY score DESC" % collid res = run_sql(query) for c, cid in res: # make a nice printable name (e.g. truncate c_printable for for long collection names): if len(c)>30: c_printable = c[:30] + "..." else: c_printable = c if level: c_printable = " " + level * '-' + " " + c_printable colls_nicely_ordered.append([c, c_printable]) colls_nicely_ordered = colls_nicely_ordered + get_nicely_ordered_collection_list(cid, level+1) return colls_nicely_ordered def get_index_id(field): """Returns first index id where the field code FIELD is indexed. Returns zero in case there is no table for this index. Example: field='author', output=4.""" out = 0 query = """SELECT w.id FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f WHERE f.code='%s' AND wf.id_field=f.id AND w.id=wf.id_idxINDEX LIMIT 1""" % MySQLdb.escape_string(field) res = run_sql(query, None, 1) if res: out = res[0][0] return out def get_words_from_pattern(pattern): "Returns list of whitespace-separated words from pattern." words = {} for word in split(pattern): if not words.has_key(word): words[word] = 1; return words.keys() def create_basic_search_units(req, p, f, m=None, of='hb'): """Splits search pattern and search field into a list of independently searchable units. - A search unit consists of '(operator, pattern, field, type, hitset)' tuples where 'operator' is set union (|), set intersection (+) or set exclusion (-); 'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics'); 'field' is either a code like 'title' or MARC tag like '100__a'; 'type' is the search type ('w' for word file search, 'a' for access file search). - Optionally, the function accepts the match type argument 'm'. If it is set (e.g. from advanced search interface), then it performs this kind of matching. If it is not set, then a guess is made. 'm' can have values: 'a'='all of the words', 'o'='any of the words', 'p'='phrase/substring', 'r'='regular expression', 'e'='exact value'. - Warnings are printed on req (when not None) in case of HTML output formats.""" opfts = [] # will hold (o,p,f,t,h) units ## check arguments: if matching type phrase/string/regexp, do we have field defined? if (m=='p' or m=='r' or m=='e') and not f: m = 'a' if of.startswith("h"): print_warning(req, "This matching type cannot be used within any field. I will perform a word search instead." ) print_warning(req, "If you want to phrase/substring/regexp search in a specific field, e.g. inside title, then please choose within title search option.") ## is desired matching type set? if m: ## A - matching type is known; good! if m == 'e': # A1 - exact value: opfts.append(['+',p,f,'a']) # '+' since we have only one unit elif m == 'p': # A2 - phrase/substring: opfts.append(['+',"%"+p+"%",f,'a']) # '+' since we have only one unit elif m == 'r': # A3 - regular expression: opfts.append(['+',p,f,'r']) # '+' since we have only one unit elif m == 'a' or m == 'w': # A4 - all of the words: p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed for word in get_words_from_pattern(p): opfts.append(['+',word,f,'w']) # '+' in all units elif m == 'o': # A5 - any of the words: p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed for word in get_words_from_pattern(p): if len(opfts)==0: opfts.append(['+',word,f,'w']) # '+' in the first unit else: opfts.append(['|',word,f,'w']) # '|' in further units else: if of.startswith("h"): print_warning(req, "Matching type '%s' is not implemented yet." % m, "Warning") opfts.append(['+',"%"+p+"%",f,'a']) else: ## B - matching type is not known: let us try to determine it by some heuristics if f and p[0]=='"' and p[-1]=='"': ## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search opfts.append(['+',p[1:-1],f,'a']) elif f and p[0]=="'" and p[-1]=="'": ## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search opfts.append(['+','%'+p[1:-1]+'%',f,'a']) elif f and p[0]=="/" and p[-1]=="/": ## B0ter - does 'p' start and end by a slash, and is 'f' defined? => doing regexp search opfts.append(['+',p[1:-1],f,'r']) elif f and string.find(p, ',') >= 0: ## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search opfts.append(['+',p,f,'a']) elif f and str(f[0:2]).isdigit(): ## B2 - does 'f' exist and starts by two digits? => doing ACC search opfts.append(['+',p,f,'a']) else: ## B3 - doing WRD search, but maybe ACC too # search units are separated by spaces unless the space is within single or double quotes # so, let us replace temporarily any space within quotes by '__SPACE__' p = sre_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p) p = sre_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p) p = sre_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p) # wash argument: p = sre_equal.sub(":", p) p = sre_logical_and.sub(" ", p) p = sre_logical_or.sub(" |", p) p = sre_logical_not.sub(" -", p) p = sre_operators.sub(r' \1', p) for pi in split(p): # iterate through separated units (or items, as "pi" stands for "p item") pi = sre_pattern_space.sub(" ", pi) # replace back '__SPACE__' by ' ' # firstly, determine set operator if pi[0] == '+' or pi[0] == '-' or pi[0] == '|': oi = pi[0] pi = pi[1:] else: # okay, there is no operator, so let us decide what to do by default oi = '+' # by default we are doing set intersection... # secondly, determine search pattern and field: if string.find(pi, ":") > 0: fi, pi = split(pi, ":", 1) else: fi, pi = f, pi # look also for old ALEPH field names: if fi and cfg_fields_convert.has_key(string.lower(fi)): fi = cfg_fields_convert[string.lower(fi)] # wash 'pi' argument: if sre_quotes.match(pi): # B3a - quotes are found => do ACC search (phrase search) if fi: if pi[0] == '"' and pi[-1] == '"': pi = string.replace(pi, '"', '') # remove quote signs opfts.append([oi,pi,fi,'a']) elif pi[0] == "'" and pi[-1] == "'": pi = string.replace(pi, "'", "") # remove quote signs opfts.append([oi,"%"+pi+"%",fi,'a']) else: # unbalanced quotes, so do WRD query: opfts.append([oi,pi,fi,'w']) else: # fi is not defined, look at where we are doing exact or subphrase search (single/double quotes): if pi[0]=='"' and pi[-1]=='"': opfts.append([oi,pi[1:-1],"anyfield",'a']) if of.startswith("h"): print_warning(req, "Searching for an exact match inside any field may be slow. You may want to search for words instead, or choose to search within specific field.") else: # nope, subphrase in global index is not possible => change back to WRD search pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed for pii in get_words_from_pattern(pi): # since there may be '-' and other chars that we do not index in WRD opfts.append([oi,pii,fi,'w']) if of.startswith("h"): print_warning(req, "The partial phrase search does not work in any field. I'll do a boolean AND searching instead.") print_warning(req, "If you want to do a partial phrase search in a specific field, e.g. inside title, then please choose 'within title' search option.", "Tip") print_warning(req, "If you want to do exact phrase matching, then please use double quotes.", "Tip") elif fi and str(fi[0]).isdigit() and str(fi[0]).isdigit(): # B3b - fi exists and starts by two digits => do ACC search opfts.append([oi,pi,fi,'a']) elif fi and not get_index_id(fi): # B3c - fi exists but there is no words table for fi => try ACC search opfts.append([oi,pi,fi,'a']) elif fi and pi.startswith('/') and pi.endswith('/'): # B3d - fi exists and slashes found => try regexp search opfts.append([oi,pi[1:-1],fi,'r']) else: # B3e - general case => do WRD search pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed for pii in get_words_from_pattern(pi): opfts.append([oi,pii,fi,'w']) ## sanity check: for i in range(0,len(opfts)): try: pi = opfts[i][1] if pi == '*': if of.startswith("h"): print_warning(req, "Ignoring standalone wildcard word.", "Warning") del opfts[i] if pi == '' or pi == ' ': fi = opfts[i][2] if fi: if of.startswith("h"): print_warning(req, "Ignoring empty %s search term." % fi, "Warning") del opfts[i] except: pass ## return search units: return opfts def page_start(req, of, cc, as, ln, uid, title_message=None): "Start page according to given output format." _ = gettext_set_language(ln) if not title_message: title_message = _("Search Results") if not req: return # we were called from CLI if of.startswith('x'): # we are doing XML output: req.content_type = "text/xml" req.send_http_header() req.write("""\n""") if of.startswith("xm"): req.write("""\n""") else: req.write("""\n""") elif of.startswith('t') or str(of[0:3]).isdigit(): # we are doing plain text output: req.content_type = "text/plain" req.send_http_header() elif of == "id": pass # nothing to do, we shall only return list of recIDs else: # we are doing HTML output: req.content_type = "text/html" req.send_http_header() req.write(pageheaderonly(title=title_message, navtrail=create_navtrail_links(cc, as, ln, 1), description="%s %s." % (cc, _("Search Results")), keywords="CDSware, WebSearch, %s" % cc, uid=uid, language=ln, urlargs=req.args)) req.write(websearch_templates.tmpl_search_pagestart(ln = ln)) def page_end(req, of="hb", ln=cdslang): "End page according to given output format: e.g. close XML tags, add HTML footer, etc." if of == "id": return [] # empty recID list if not req: return # we were called from CLI if of.startswith('h'): req.write(websearch_templates.tmpl_search_pageend(ln = ln)) # pagebody end req.write(pagefooteronly(lastupdated=__lastupdated__, language=ln, urlargs=req.args)) elif of.startswith('x'): req.write("""\n""") return "\n" def create_inputdate_box(name="d1", selected_year=0, selected_month=0, selected_day=0, ln=cdslang): "Produces 'From Date', 'Until Date' kind of selection box. Suitable for search options." _ = gettext_set_language(ln) box = "" # day box += """""" # month box += """""" # year box += """""" return box def create_google_box(cc, p, f, p1, p2, p3, ln=cdslang, prolog_start="""
""", prolog_end="""
""", column_separator="""""", link_separator= """
""", epilog="""
"""): "Creates the box that proposes links to other useful search engines like Google. 'p' is the search pattern." if not p and (p1 or p2 or p3): p = p1 + " " + p2 + " " + p3 # check suitable p's whether we want to print it if cfg_google_box == 0 or \ p == "" or \ string.find(p, "recid:")>=0 or \ string.find(p, "sysno:")>=0 or \ string.find(p, "sysnos:")>=0: return "" # remove our logical field index names: p = sre.sub(r'\w+:', '', p) return websearch_templates.tmpl_google_box( ln = ln, cc = cc, p = p, f = f, prolog_start = prolog_start, prolog_end = prolog_end, column_separator = column_separator, link_separator = link_separator, epilog = epilog, ) def create_search_box(cc, colls, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, action=""): "Create search box for 'search again in the results page' functionality." # load the right message language _ = gettext_set_language(ln) # some computations if cc == cdsname: cc_intl = cdsnameintl[ln] else: cc_intl = get_coll_i18nname(cc, ln) colls_nicely_ordered = [] if cfg_nicely_ordered_collection_list: colls_nicely_ordered = get_nicely_ordered_collection_list() else: colls_nicely_ordered = get_alphabetically_ordered_collection_list() colls_nice = [] for (cx, cx_printable) in colls_nicely_ordered: if not cx.startswith("Unnamed collection"): colls_nice.append({ 'value' : cx, 'text' : cx_printable }) coll_selects = [] if colls and colls[0] != cdsname: # some collections are defined, so print these first, and only then print 'add another collection' heading: for c in colls: if c: temp = [] temp.append({ 'value' : '', 'text' : '*** %s ***' % _("any collection") }) for val in colls_nice: # print collection: if not cx.startswith("Unnamed collection"): temp.append({ 'value' : val['value'], 'text' : val['text'], 'selected' : (c == sre.sub("^[\s\-]*","", val['value'])) }) coll_selects.append(temp) coll_selects.append([{ 'value' : '', 'text' : '*** %s ***' % _("add another collection") }] + colls_nice) else: # we searched in CDSNAME, so print 'any collection' heading coll_selects.append([{ 'value' : '', 'text' : '*** %s ***' % _("any collection") }] + colls_nice) sort_formats = [{ 'value' : '', 'text' : _("latest first") }] query = """SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='soo' AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""" res = run_sql(query) for code, name in res: sort_formats.append({ 'value' : code, 'text' : name, }) ## ranking methods ranks = [{ 'value' : '', 'text' : "- %s %s -" % (_("OR").lower (), _("rank by")), }] for (code,name) in get_bibrank_methods(get_colID(cc), ln): # propose found rank methods: ranks.append({ 'value' : code, 'text' : name, }) formats = [] query = """SELECT code,name FROM format ORDER BY name ASC""" res = run_sql(query) if res: # propose found formats: for code, name in res: formats.append({ 'value' : code, 'text' : name }) else: formats.append({'value' : 'hb', 'text' : _("HTML brief") }) return websearch_templates.tmpl_search_box( ln = ln, weburl = weburl, as = as, cc_intl = cc_intl, cc = cc, ot = ot, sp = sp, action = action, fieldslist = get_searchwithin_fields(ln = ln), f1 = f1, f2 = f2, f3 = f3, m1 = m1, m2 = m2, m3 = m3, p1 = p1, p2 = p2, p3 = p3, op1 = op1, op2 = op2, rm = rm, p = p, f = f, coll_selects = coll_selects, d1y = d1y, d2y = d2y, d1m = d1m, d2m = d2m, d1d = d1d, d2d = d2d, sort_formats = sort_formats, sf = sf, so = so, ranks = ranks, sc = sc, rg = rg, formats = formats, of = of, pl = pl ) def create_navtrail_links(cc=cdsname, as=0, ln=cdslang, self_p=1, separator=" > "): """Creates navigation trail links, i.e. links to collection ancestors (except Home collection). If as==1, then links to Advanced Search interfaces; otherwise Simple Search. """ dads = [] for dad in get_coll_ancestors(cc): if dad != cdsname: # exclude Home collection dads.append ((dad, get_coll_i18nname (dad, ln))) if self_p and cc != cdsname: dads.append ((cc, get_coll_i18nname(cc, ln))) return websearch_templates.tmpl_navtrail_links (as = as, ln = ln, weburl = weburl, separator = separator, dads = dads) def create_searchwithin_selection_box(fieldname='f', value='', ln='en'): "Produces 'search within' selection box for the current collection." out = "" out += """""" return out def get_searchwithin_fields(ln='en'): "Retrieves the fields name used in the 'search within' selection box for the current collection." query = "SELECT code,name FROM field ORDER BY name ASC" res = run_sql(query) fields = [{ 'value' : '', 'text' : get_field_i18nname("any field", ln) }] for field_code, field_name in res: if field_code and field_code != "anyfield": fields.append({ 'value' : field_code, 'text' : get_field_i18nname(field_name, ln) }) return fields def create_andornot_box(name='op', value='', ln='en'): "Returns HTML code for the AND/OR/NOT selection box." _ = gettext_set_language(ln) out = """ """ % (name, is_selected('a', value), _("AND"), is_selected('o', value), _("OR"), is_selected('n', value), _("AND NOT")) return out def create_matchtype_box(name='m', value='', ln='en'): "Returns HTML code for the 'match type' selection box." _ = gettext_set_language(ln) out = """ """ % (name, is_selected('a', value), _("All of the words:"), is_selected('o', value), _("Any of the words:"), is_selected('e', value), _("Exact phrase:"), is_selected('p', value), _("Partial phrase:"), is_selected('r', value), _("Regular expression:")) return out def nice_number(num, ln=cdslang): "Returns nicely printed number NUM in language LN using thousands separator char defined in the I18N messages file." if num is None: return None _ = gettext_set_language(ln) separator = _(",") chars_in = list(str(num)) num = len(chars_in) chars_out = [] for i in range(0,num): if i % 3 == 0 and i != 0: chars_out.append(separator) chars_out.append(chars_in[num-i-1]) chars_out.reverse() return ''.join(chars_out) def is_selected(var, fld): "Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes." if type(var) is int and type(fld) is int: if var == fld: return " selected" elif str(var) == str(fld): return " selected" elif fld and len(fld)==3 and fld[0] == "w" and var == fld[1:]: return " selected" return "" def urlargs_replace_text_in_arg(urlargs, regexp_argname, text_old, text_new): """Analyze `urlargs' (URL CGI GET query arguments) and for each occurrence of argument matching `regexp_argname' replace every substring `text_old' by `text_new'. Return the resulting URL. Useful for create_nearest_terms_box.""" out = "" # parse URL arguments into a dictionary: urlargsdict = cgi.parse_qs(urlargs) ## construct new URL arguments: urlargsdictnew = {} for key in urlargsdict.keys(): if sre.match(regexp_argname, key): # replace `arg' by new values urlargsdictnew[key] = [] for parg in urlargsdict[key]: urlargsdictnew[key].append(string.replace(parg, text_old, text_new)) else: # keep old values urlargsdictnew[key] = urlargsdict[key] # build new URL for this word: for key in urlargsdictnew.keys(): for val in urlargsdictnew[key]: out += "&" + key + "=" + urllib.quote_plus(val, '') if out.startswith("&"): out = out[1:] return out class HitSet: """Class describing set of records, implemented as bit vectors of recIDs. Using Numeric arrays for speed (1 value = 8 bits), can use later "real" bit vectors to save space.""" def __init__(self, init_set=None): self._nbhits = -1 if init_set: self._set = init_set else: self._set = Numeric.zeros(cfg_max_recID+1, Numeric.Int0) def __repr__(self, join=string.join): return "%s(%s)" % (self.__class__.__name__, join(map(repr, self._set), ', ')) def add(self, recID): "Adds a record to the set." self._set[recID] = 1 def addmany(self, recIDs): "Adds several recIDs to the set." for recID in recIDs: self._set[recID] = 1 def addlist(self, arr): "Adds an array of recIDs to the set." Numeric.put(self._set, arr, 1) def remove(self, recID): "Removes a record from the set." self._set[recID] = 0 def removemany(self, recIDs): "Removes several records from the set." for recID in recIDs: self.remove(recID) def intersect(self, other): "Does a set intersection with other. Keep result in self." self._set = Numeric.bitwise_and(self._set, other._set) def union(self, other): "Does a set union with other. Keep result in self." self._set = Numeric.bitwise_or(self._set, other._set) def difference(self, other): "Does a set difference with other. Keep result in self." #self._set = Numeric.bitwise_not(self._set, other._set) for recID in Numeric.nonzero(other._set): self.remove(recID) def contains(self, recID): "Checks whether the set contains recID." return self._set[recID] __contains__ = contains # Higher performance member-test for python 2.0 and above def __getitem__(self, index): "Support for the 'for item in set:' protocol." return Numeric.nonzero(self._set)[index] def calculate_nbhits(self): "Calculates the number of records set in the hitset." self._nbhits = Numeric.sum(self._set.copy().astype(Numeric.Int)) def items(self): "Return an array containing all recID." return Numeric.nonzero(self._set) def tolist(self): "Return an array containing all recID." return Numeric.nonzero(self._set).tolist() # speed up HitSet operations by ~20% if Psyco is installed: try: import psyco psyco.bind(HitSet) except: pass def escape_string(s): "Escapes special chars in string. For MySQL queries." s = MySQLdb.escape_string(s) return s def wash_colls(cc, c, split_colls=0): """Wash collection list by checking whether user has deselected anything under 'Narrow search'. Checks also if cc is a list or not. Return list of cc, colls_to_display, colls_to_search since the list of collections to display is different from that to search in. This is because users might have chosen 'split by collection' functionality. The behaviour of "collections to display" depends solely whether user has deselected a particular collection: e.g. if it started from 'Articles and Preprints' page, and deselected 'Preprints', then collection to display is 'Articles'. If he did not deselect anything, then collection to display is 'Articles & Preprints'. The behaviour of "collections to search in" depends on the 'split_colls' parameter: * if is equal to 1, then we can wash the colls list down and search solely in the collection the user started from; * if is equal to 0, then we are splitting to the first level of collections, i.e. collections as they appear on the page we started to search from; """ colls_out = [] colls_out_for_display = [] # check what type is 'cc': if type(cc) is list: for ci in cc: if collection_reclist_cache.has_key(ci): # yes this collection is real, so use it: cc = ci break else: # check once if cc is real: if not collection_reclist_cache.has_key(cc): cc = cdsname # cc is not real, so replace it with Home collection # check type of 'c' argument: if type(c) is list: colls = c else: colls = [c] # remove all 'unreal' collections: colls_real = [] for coll in colls: if collection_reclist_cache.has_key(coll): colls_real.append(coll) colls = colls_real # check if some real collections remain: if len(colls)==0: colls = [cc] # then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll': query = "SELECT c.name FROM collection AS c, collection_collection AS cc, collection AS ccc WHERE c.id=cc.id_son AND cc.id_dad=ccc.id AND ccc.name='%s' AND cc.type='r' AND c.restricted IS NULL" % MySQLdb.escape_string(cc) res = run_sql(query) l_cc_nonrestricted_sons = [] l_c = colls for row in res: l_cc_nonrestricted_sons.append(row[0]) l_c.sort() l_cc_nonrestricted_sons.sort() if l_cc_nonrestricted_sons == l_c: colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc' else: colls_out_for_display = colls # nope, we need to display all 'colls' successively # remove duplicates: colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1)) colls_out_for_display = map(lambda x, colls_out_for_display=colls_out_for_display:colls_out_for_display[x-1], colls_out_for_display_nondups) # second, let us decide on collection splitting: if split_colls == 0: # type A - no sons are wanted colls_out = colls_out_for_display # elif split_colls == 1: else: # type B - sons (first-level descendants) are wanted for coll in colls_out_for_display: coll_sons = get_coll_sons(coll) if coll_sons == []: colls_out.append(coll) else: colls_out = colls_out + coll_sons # remove duplicates: colls_out_nondups=filter(lambda x, colls_out=colls_out: colls_out[x-1] not in colls_out[x:], range(1, len(colls_out)+1)) colls_out = map(lambda x, colls_out=colls_out:colls_out[x-1], colls_out_nondups) return (cc, colls_out_for_display, colls_out) def strip_accents(x): """Strip accents in the input phrase X (assumed in UTF-8) by replacing accented characters with their unaccented cousins (e.g. é by e). Return such a stripped X.""" # convert input into Unicode string: try: y = unicode(x, "utf-8") except: return x # something went wrong, probably the input wasn't UTF-8 # asciify Latin-1 lowercase characters: y = sre_unicode_lowercase_a.sub("a", y) y = sre_unicode_lowercase_ae.sub("ae", y) y = sre_unicode_lowercase_e.sub("e", y) y = sre_unicode_lowercase_i.sub("i", y) y = sre_unicode_lowercase_o.sub("o", y) y = sre_unicode_lowercase_u.sub("u", y) y = sre_unicode_lowercase_y.sub("y", y) y = sre_unicode_lowercase_c.sub("c", y) y = sre_unicode_lowercase_n.sub("n", y) # asciify Latin-1 uppercase characters: y = sre_unicode_uppercase_a.sub("A", y) y = sre_unicode_uppercase_ae.sub("AE", y) y = sre_unicode_uppercase_e.sub("E", y) y = sre_unicode_uppercase_i.sub("I", y) y = sre_unicode_uppercase_o.sub("O", y) y = sre_unicode_uppercase_u.sub("U", y) y = sre_unicode_uppercase_y.sub("Y", y) y = sre_unicode_uppercase_c.sub("C", y) y = sre_unicode_uppercase_n.sub("N", y) # return UTF-8 representation of the Unicode string: return y.encode("utf-8") def wash_pattern(p): """Wash pattern passed by URL. Check for sanity of the wildcard by removing wildcards if they are appended to extremely short words (1-3 letters). TODO: instead of this approximative treatment, it will be much better to introduce a temporal limit, e.g. to kill a query if it does not finish in 10 seconds.""" # strip accents: # p = strip_accents(p) # FIXME: when available, strip accents all the time # add leading/trailing whitespace for the two following wildcard-sanity checking regexps: p = " " + p + " " # get rid of wildcards at the beginning of words: p = sre_pattern_wildcards_at_beginning.sub("\\1", p) # replace spaces within quotes by __SPACE__ temporarily: p = sre_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p) p = sre_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p) p = sre_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p) # get rid of extremely short words (1-3 letters with wildcards): p = sre_pattern_short_words.sub("\\1", p) # replace back __SPACE__ by spaces: p = sre_pattern_space.sub(" ", p) # replace special terms: - p = sre_pattern_today.sub(time.strftime("%04Y-%02m-%02d", time.localtime()), p) + p = sre_pattern_today.sub(time.strftime("%Y-%m-%d", time.localtime()), p) # remove unnecessary whitespace: p = string.strip(p) return p def wash_field(f): """Wash field passed by URL.""" # get rid of unnecessary whitespace: f = string.strip(f) # wash old-style CDSware/ALEPH 'f' field argument, e.g. replaces 'wau' and 'au' by 'author' if cfg_fields_convert.has_key(string.lower(f)): f = cfg_fields_convert[f] return f def wash_dates(d1y, d1m, d1d, d2y, d2m, d2d): """Take user-submitted dates (day, month, year) of the web form and return (day1, day2) in YYYY-MM-DD format suitable for time restricted searching. I.e. pay attention when months are not there to put 01 or 12 according to if it's the starting or the ending date, etc.""" day1, day2 = "", "" # sanity checking: if d1y==0 and d1m==0 and d1d==0 and d2y==0 and d2m==0 and d2d==0: return ("", "") # nothing selected, so return empty values # construct day1 (from): if d1y: day1 += "%04d" % d1y else: day1 += "0000" if d1m: day1 += "-%02d" % d1m else: day1 += "-01" if d1d: day1 += "-%02d" % d1d else: day1 += "-01" # construct day2 (until): if d2y: day2 += "%04d" % d2y else: day2 += "9999" if d2m: day2 += "-%02d" % d2m else: day2 += "-12" if d2d: day2 += "-%02d" % d2d else: day2 += "-31" # NOTE: perhaps we should add max(datenumber) in # given month, but for our quering it's not # needed, 31 will always do # okay, return constructed YYYY-MM-DD dates return (day1, day2) def get_colID(c): "Return collection ID for collection name C. Return None if no match found." colID = None res = run_sql("SELECT id FROM collection WHERE name=%s", (c,), 1) if res: colID = res[0][0] return colID def get_coll_i18nname(c, ln=cdslang): """Return nicely formatted collection name (of name type 'ln', 'long name') for collection C in language LN.""" global collection_i18nname_cache global collection_i18nname_cache_timestamp # firstly, check whether the collectionname table was modified: res = run_sql("SHOW TABLE STATUS LIKE 'collectionname'") if res and str(res[0][11])>collection_i18nname_cache_timestamp: # yes it was, cache clear-up needed: collection_i18nname_cache = create_collection_i18nname_cache() # secondly, read i18n name from either the cache or return common name: out = c try: out = collection_i18nname_cache[c][ln] except KeyError: pass # translation in LN does not exist return out def get_field_i18nname(f, ln=cdslang): """Return nicely formatted field name (of type 'ln', 'long name') for field F in language LN.""" global field_i18nname_cache global field_i18nname_cache_timestamp # firstly, check whether the fieldname table was modified: res = run_sql("SHOW TABLE STATUS LIKE 'fieldname'") if res and str(res[0][11])>field_i18nname_cache_timestamp: # yes it was, cache clear-up needed: field_i18nname_cache = create_field_i18nname_cache() # secondly, read i18n name from either the cache or return common name: out = f try: out = field_i18nname_cache[f][ln] except KeyError: pass # translation in LN does not exist return out def get_coll_ancestors(coll): "Returns a list of ancestors for collection 'coll'." coll_ancestors = [] coll_ancestor = coll while 1: query = "SELECT c.name FROM collection AS c "\ "LEFT JOIN collection_collection AS cc ON c.id=cc.id_dad "\ "LEFT JOIN collection AS ccc ON ccc.id=cc.id_son "\ "WHERE ccc.name='%s' ORDER BY cc.id_dad ASC LIMIT 1" \ % escape_string(coll_ancestor) res = run_sql(query, None, 1) if res: coll_name = res[0][0] coll_ancestors.append(coll_name) coll_ancestor = coll_name else: break # ancestors found, return reversed list: coll_ancestors.reverse() return coll_ancestors def get_coll_sons(coll, type='r', public_only=1): """Return a list of sons (first-level descendants) of type 'type' for collection 'coll'. If public_only, then return only non-restricted son collections. """ coll_sons = [] query = "SELECT c.name FROM collection AS c "\ "LEFT JOIN collection_collection AS cc ON c.id=cc.id_son "\ "LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad "\ "WHERE cc.type='%s' AND ccc.name='%s'" \ % (escape_string(type), escape_string(coll)) if public_only: query += " AND c.restricted IS NULL " query += " ORDER BY cc.score DESC" res = run_sql(query) for name in res: coll_sons.append(name[0]) return coll_sons def get_coll_real_descendants(coll): """Return a list of all descendants of collection 'coll' that are defined by a 'dbquery'. IOW, we need to decompose compound collections like "A & B" into "A" and "B" provided that "A & B" has no associated database query defined. """ coll_sons = [] query = "SELECT c.name,c.dbquery FROM collection AS c "\ "LEFT JOIN collection_collection AS cc ON c.id=cc.id_son "\ "LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad "\ "WHERE ccc.name='%s' ORDER BY cc.score DESC" \ % escape_string(coll) res = run_sql(query) for name, dbquery in res: if dbquery: # this is 'real' collection, so return it: coll_sons.append(name) else: # this is 'composed' collection, so recurse: coll_sons.extend(get_coll_real_descendants(name)) return coll_sons def get_collection_reclist(coll): """Return hitset of recIDs that belong to the collection 'coll'. But firstly check the last updated date of the collection table. If it's newer than the cache timestamp, then empty the cache, since new records could have been added.""" global collection_reclist_cache global collection_reclist_cache_timestamp # firstly, check whether the collection table was modified: res = run_sql("SHOW TABLE STATUS LIKE 'collection'") if res and str(res[0][11])>collection_reclist_cache_timestamp: # yes it was, cache clear-up needed: collection_reclist_cache = create_collection_reclist_cache() # secondly, read reclist from either the cache or the database: if not collection_reclist_cache[coll]: # not yet it the cache, so calculate it and fill the cache: set = HitSet() query = "SELECT nbrecs,reclist FROM collection WHERE name='%s'" % coll res = run_sql(query, None, 1) if res: try: set._nbhits, set._set = res[0][0], Numeric.loads(zlib.decompress(res[0][1])) except: set._nbhits = 0 collection_reclist_cache[coll] = set # finally, return reclist: return collection_reclist_cache[coll] def coll_restricted_p(coll): "Predicate to test if the collection coll is restricted or not." if not coll: return 0 query = "SELECT restricted FROM collection WHERE name='%s'" % MySQLdb.escape_string(coll) res = run_sql(query, None, 1) if res and res[0][0] != None: return 1 else: return 0 def coll_restricted_group(coll): "Return Apache group to which the collection is restricted. Return None if it's public." if not coll: return None query = "SELECT restricted FROM collection WHERE name='%s'" % MySQLdb.escape_string(coll) res = run_sql(query, None, 1) if res: return res[0][0] else: return None def create_collection_reclist_cache(): """Creates list of records belonging to collections. Called on startup and used later for intersecting search results with collection universe.""" global collection_reclist_cache_timestamp collrecs = {} res = run_sql("SELECT name,reclist FROM collection") for name,reclist in res: collrecs[name] = None # this will be filled later during runtime by calling get_collection_reclist(coll) # update timestamp try: - collection_reclist_cache_timestamp = time.strftime("%04Y-%02m-%02d %02H:%02M:%02S", time.localtime()) + collection_reclist_cache_timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) except NameError: collection_reclist_cache_timestamp = 0 return collrecs try: collection_reclist_cache.has_key(cdsname) except: try: collection_reclist_cache = create_collection_reclist_cache() except: collection_reclist_cache = {} def create_collection_i18nname_cache(): """Create cache of I18N collection names of type 'ln' (=long name). Called on startup and used later during the search time.""" global collection_i18nname_cache_timestamp names = {} res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name for c,ln,i18nname in res: if i18nname: try: names[c] except KeyError: names[c] = {} names[c][ln] = i18nname # update timestamp try: - collection_i18nname_cache_timestamp = time.strftime("%04Y-%02m-%02d %02H:%02M:%02S", time.localtime()) + collection_i18nname_cache_timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) except NameError: collection_i18nname_cache_timestamp = 0 return names try: collection_i18nname_cache.has_key(cdsname) except: try: collection_i18nname_cache = create_collection_i18nname_cache() except: collection_i18nname_cache = {} def create_field_i18nname_cache(): """Create cache of I18N field names of type 'ln' (=long name). Called on startup and used later during the search time.""" global field_i18nname_cache_timestamp names = {} res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name for f,ln,i18nname in res: if i18nname: try: names[f] except KeyError: names[f] = {} names[f][ln] = i18nname # update timestamp try: - field_i18nname_cache_timestamp = time.strftime("%04Y-%02m-%02d %02H:%02M:%02S", time.localtime()) + field_i18nname_cache_timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) except NameError: field_i18nname_cache_timestamp = 0 return names try: field_i18nname_cache.has_key(cdsname) except: try: field_i18nname_cache = create_field_i18nname_cache() except: field_i18nname_cache = {} def browse_pattern(req, colls, p, f, rg, ln=cdslang): """Browse either biliographic phrases or words indexes, and display it.""" # load the right message language _ = gettext_set_language(ln) ## do we search in words indexes? if not f: return browse_in_bibwords(req, p, f) ## prepare collection urlargument for later printing: p_orig = p urlarg_colls = "" for coll in colls: urlarg_colls += "&c=%s" % urllib.quote(coll) ## okay, "real browse" follows: browsed_phrases = get_nearest_terms_in_bibxxx(p, f, rg, 1) while not browsed_phrases: # try again and again with shorter and shorter pattern: try: p = p[:-1] browsed_phrases = get_nearest_terms_in_bibxxx(p, f, rg, 1) except: # probably there are no hits at all: req.write(_("No values found.")) return ## try to check hits in these particular collection selection: browsed_phrases_in_colls = [] if 0: for phrase in browsed_phrases: phrase_hitset = HitSet() phrase_hitsets = search_pattern("", phrase, f, 'e') for coll in colls: phrase_hitset.union(phrase_hitsets[coll]) phrase_hitset.calculate_nbhits() if phrase_hitset._nbhits > 0: # okay, this phrase has some hits in colls, so add it: browsed_phrases_in_colls.append([phrase, phrase_hitset._nbhits]) ## were there hits in collections? if browsed_phrases_in_colls == []: if browsed_phrases != []: #print_warning(req, """

No match close to %s found in given collections. #Please try different term.

Displaying matches in any collection...""" % p_orig) ## try to get nbhits for these phrases in any collection: for phrase in browsed_phrases: browsed_phrases_in_colls.append([phrase, get_nbhits_in_bibxxx(phrase, f)]) ## display results now: out = websearch_templates.tmpl_browse_pattern( f = get_field_i18nname(f, ln), ln = ln, browsed_phrases_in_colls = browsed_phrases_in_colls, weburl = weburl, urlarg_colls = urlarg_colls, ) req.write(out) return def browse_in_bibwords(req, p, f, ln=cdslang): """Browse inside words indexes.""" if not p: return _ = gettext_set_language(ln) urlargs = string.replace(req.args, "action=%s" % _("Browse"), "action=%s" % _("Search")) nearest_box = create_nearest_terms_box(urlargs, p, f, 'w', ln=ln, intro_text_p=0) req.write(websearch_templates.tmpl_search_in_bibwords( p = p, f = f, ln = ln, nearest_box = nearest_box )) return def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=cdslang): """Search for complex pattern 'p' within field 'f' according to matching type 'm'. Return hitset of recIDs. The function uses multi-stage searching algorithm in case of no exact match found. See the Search Internals document for detailed description. The 'ap' argument governs whether an alternative patterns are to be used in case there is no direct hit for (p,f,m). For example, whether to replace non-alphanumeric characters by spaces if it would give some hits. See the Search Internals document for detailed description. (ap=0 forbits the alternative pattern usage, ap=1 permits it.) The 'of' argument governs whether to print or not some information to the user in case of no match found. (Usually it prints the information in case of HTML formats, otherwise it's silent). The 'verbose' argument controls the level of debugging information to be printed (0=least, 9=most). All the parameters are assumed to have been previously washed. This function is suitable as a mid-level API. """ _ = gettext_set_language(ln) hitset_empty = HitSet() hitset_empty._nbhits = 0 # sanity check: if not p: hitset_full = HitSet(Numeric.ones(cfg_max_recID+1, Numeric.Int0)) hitset_full._nbhits = cfg_max_recID # no pattern, so return all universe return hitset_full # search stage 1: break up arguments into basic search units: if verbose and of.startswith("h"): t1 = os.times()[4] basic_search_units = create_basic_search_units(req, p, f, m, of) if verbose and of.startswith("h"): t2 = os.times()[4] print_warning(req, "Search stage 1: basic search units are: %s" % basic_search_units) print_warning(req, "Search stage 1: execution took %.2f seconds." % (t2 - t1)) # search stage 2: do search for each search unit and verify hit presence: if verbose and of.startswith("h"): t1 = os.times()[4] basic_search_units_hitsets = [] for idx_unit in range(0,len(basic_search_units)): bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit] basic_search_unit_hitset = search_unit(bsu_p, bsu_f, bsu_m) if verbose >= 9 and of.startswith("h"): print_warning(req, "Search stage 1: pattern %s gave hitlist %s" % (bsu_p, Numeric.nonzero(basic_search_unit_hitset._set))) if basic_search_unit_hitset._nbhits>0 or \ ap==0 or \ bsu_o=="|" or \ ((idx_unit+1) 0: # we retain the new unit instead if of.startswith('h'): print_warning(req, _("No exact match found for %s, using %s instead...") % (bsu_p,bsu_pn)) basic_search_units[idx_unit][1] = bsu_pn basic_search_units_hitsets.append(basic_search_unit_hitset) else: # stage 2-3: no hits found either, propose nearest indexed terms: if of.startswith('h'): if req: if bsu_f == "recid": print_warning(req, "Requested record does not seem to exist.") else: print_warning(req, create_nearest_terms_box(req.args, bsu_p, bsu_f, bsu_m, ln=ln)) return hitset_empty else: # stage 2-3: no hits found either, propose nearest indexed terms: if of.startswith('h'): if req: if bsu_f == "recid": print_warning(req, "Requested record does not seem to exist.") else: print_warning(req, create_nearest_terms_box(req.args, bsu_p, bsu_f, bsu_m, ln=ln)) return hitset_empty if verbose and of.startswith("h"): t2 = os.times()[4] for idx_unit in range(0,len(basic_search_units)): print_warning(req, "Search stage 2: basic search unit %s gave %d hits." % (basic_search_units[idx_unit][1:], basic_search_units_hitsets[idx_unit]._nbhits)) print_warning(req, "Search stage 2: execution took %.2f seconds." % (t2 - t1)) # search stage 3: apply boolean query for each search unit: if verbose and of.startswith("h"): t1 = os.times()[4] # let the initial set be the complete universe: hitset_in_any_collection = HitSet(Numeric.ones(cfg_max_recID+1, Numeric.Int0)) for idx_unit in range(0,len(basic_search_units)): this_unit_operation = basic_search_units[idx_unit][0] this_unit_hitset = basic_search_units_hitsets[idx_unit] if this_unit_operation == '+': hitset_in_any_collection.intersect(this_unit_hitset) elif this_unit_operation == '-': hitset_in_any_collection.difference(this_unit_hitset) elif this_unit_operation == '|': hitset_in_any_collection.union(this_unit_hitset) else: if of.startswith("h"): print_warning(req, "Invalid set operation %s." % this_unit_operation, "Error") hitset_in_any_collection.calculate_nbhits() if hitset_in_any_collection._nbhits == 0: # no hits found, propose alternative boolean query: if of.startswith('h'): nearestterms = [] for idx_unit in range(0,len(basic_search_units)): bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit] bsu_nbhits = basic_search_units_hitsets[idx_unit]._nbhits url_args_new = sre.sub(r'(^|\&)p=.*?(\&|$)', r'\1p='+urllib.quote(bsu_p)+r'\2', req.args) url_args_new = sre.sub(r'(^|\&)f=.*?(\&|$)', r'\1f='+urllib.quote(bsu_f)+r'\2', url_args_new) nearestterms.append({ 'nbhits' : bsu_nbhits, 'url_args' : url_args_new, 'p' : bsu_p, }) text = websearch_templates.tmpl_search_no_boolean_hits( ln = ln, weburl = weburl, nearestterms = nearestterms, ) print_warning(req, text) if verbose and of.startswith("h"): t2 = os.times()[4] print_warning(req, "Search stage 3: boolean query gave %d hits." % hitset_in_any_collection._nbhits) print_warning(req, "Search stage 3: execution took %.2f seconds." % (t2 - t1)) return hitset_in_any_collection def search_unit(p, f=None, m=None): """Search for basic search unit defined by pattern 'p' and field 'f' and matching type 'm'. Return hitset of recIDs. All the parameters are assumed to have been previously washed. 'p' is assumed to be already a ``basic search unit'' so that it is searched as such and is not broken up in any way. Only wildcard and span queries are being detected inside 'p'. This function is suitable as a low-level API. """ ## create empty output results set: set = HitSet() if not p: # sanity checking return set if m == 'a' or m == 'r': # we are doing either direct bibxxx search or phrase search or regexp search set = search_unit_in_bibxxx(p, f, m) else: # we are doing bibwords search by default set = search_unit_in_bibwords(p, f) set.calculate_nbhits() return set def search_unit_in_bibwords(word, f, decompress=zlib.decompress): """Searches for 'word' inside bibwordsX table for field 'f' and returns hitset of recIDs.""" set = HitSet() # will hold output result set set_used = 0 # not-yet-used flag, to be able to circumvent set operations # deduce into which bibwordsX table we will search: bibwordsX = "idxWORD%02dF" % get_index_id("anyfield") if f: index_id = get_index_id(f) if index_id: bibwordsX = "idxWORD%02dF" % index_id else: return HitSet() # word index f does not exist # wash 'word' argument and construct query: word = string.replace(word, '*', '%') # we now use '*' as the truncation character words = string.split(word, "->", 1) # check for span query if len(words) == 2: word0 = sre_word.sub('', words[0]) word1 = sre_word.sub('', words[1]) query = "SELECT term,hitlist FROM %s WHERE term BETWEEN '%s' AND '%s'" % (bibwordsX, escape_string(word0[:50]), escape_string(word1[:50])) else: word = sre_word.sub('', word) if string.find(word, '%') >= 0: # do we have wildcard in the word? query = "SELECT term,hitlist FROM %s WHERE term LIKE '%s'" % (bibwordsX, escape_string(word[:50])) else: query = "SELECT term,hitlist FROM %s WHERE term='%s'" % (bibwordsX, escape_string(word[:50])) # launch the query: res = run_sql(query) # fill the result set: for word,hitlist in res: hitset_bibwrd = HitSet(Numeric.loads(decompress(hitlist))) # add the results: if set_used: set.union(hitset_bibwrd) else: set = hitset_bibwrd set_used = 1 # okay, return result set: return set def search_unit_in_bibxxx(p, f, type): """Searches for pattern 'p' inside bibxxx tables for field 'f' and returns hitset of recIDs found. The search type is defined by 'type' (e.g. equals to 'r' for a regexp search).""" p_orig = p # saving for eventual future 'no match' reporting # wash arguments: f = string.replace(f, '*', '%') # replace truncation char '*' in field definition if type == 'r': pattern = "REGEXP '%s'" % MySQLdb.escape_string(p) else: p = string.replace(p, '*', '%') # we now use '*' as the truncation character ps = string.split(p, "->", 1) # check for span query: if len(ps) == 2: pattern = "BETWEEN '%s' AND '%s'" % (MySQLdb.escape_string(ps[0]), MySQLdb.escape_string(ps[1])) else: if string.find(p, '%') > -1: pattern = "LIKE '%s'" % MySQLdb.escape_string(ps[0]) else: pattern = "='%s'" % MySQLdb.escape_string(ps[0]) # construct 'tl' which defines the tag list (MARC tags) to search in: tl = [] if str(f[0]).isdigit() and str(f[1]).isdigit(): tl.append(f) # 'f' seems to be okay as it starts by two digits else: # convert old ALEPH tag names, if appropriate: (TODO: get rid of this before entering this function) if cfg_fields_convert.has_key(string.lower(f)): f = cfg_fields_convert[string.lower(f)] # deduce desired MARC tags on the basis of chosen 'f' tl = get_field_tags(f) if not tl: # f index does not exist, nevermind pass # okay, start search: l = [] # will hold list of recID that matched for t in tl: # deduce into which bibxxx table we will search: digit1, digit2 = int(t[0]), int(t[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) # construct query: if t == "001": query = "SELECT id FROM bibrec WHERE id %s" % pattern else: if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: query = "SELECT bibx.id_bibrec FROM %s AS bx LEFT JOIN %s AS bibx ON bx.id=bibx.id_bibxxx WHERE bx.value %s AND bx.tag LIKE '%s%%'" %\ (bx, bibx, pattern, t) else: query = "SELECT bibx.id_bibrec FROM %s AS bx LEFT JOIN %s AS bibx ON bx.id=bibx.id_bibxxx WHERE bx.value %s AND bx.tag='%s'" %\ (bx, bibx, pattern, t) # launch the query: res = run_sql(query) # fill the result set: for id_bibrec in res: if id_bibrec[0]: l.append(id_bibrec[0]) # check no of hits found: nb_hits = len(l) # okay, return result set: set = HitSet() set.addlist(Numeric.array(l)) return set def search_unit_in_bibrec(day1, day2, type='creation_date'): """Return hitset of recIDs found that were either created or modified (see 'type' arg) from day1 until day2, inclusive. Does not pay attention to pattern, collection, anything. Useful to intersect later on with the 'real' query.""" set = HitSet() if type != "creation_date" and type != "modification_date": # type argument is invalid, so search for creation dates by default type = "creation_date" res = run_sql("SELECT id FROM bibrec WHERE %s>=%s AND %s<=%s" % (type, "%s", type, "%s"), (day1, day2)) l = [] for row in res: l.append(row[0]) set.addlist(Numeric.array(l)) return set def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, of="hb", verbose=0, ln=cdslang): """Return dict of hitsets given by intersection of hitset with the collection universes.""" _ = gettext_set_language(ln) # search stage 4: intersect with the collection universe: if verbose and of.startswith("h"): t1 = os.times()[4] results = {} results_nbhits = 0 for coll in colls: results[coll] = HitSet() results[coll]._set = Numeric.bitwise_and(hitset_in_any_collection._set, get_collection_reclist(coll)._set) results[coll].calculate_nbhits() results_nbhits += results[coll]._nbhits if results_nbhits == 0: # no hits found, try to search in Home: results_in_Home = HitSet() results_in_Home._set = Numeric.bitwise_and(hitset_in_any_collection._set, get_collection_reclist(cdsname)._set) results_in_Home.calculate_nbhits() if results_in_Home._nbhits > 0: # some hits found in Home, so propose this search: url_args = req.args url_args = sre.sub(r'(^|\&)cc=.*?(\&|$)', r'\2', url_args) url_args = sre.sub(r'(^|\&)c=.*?(\&[^c]+=|$)', r'\2', url_args) url_args = sre.sub(r'^\&+', '', url_args) url_args = sre.sub(r'\&+$', '', url_args) if of.startswith("h"): print_warning(req, _("No match found in collection %s. Other public collections gave " "%d hits.") % (string.join(colls, ","), weburl, url_args, results_in_Home._nbhits)) results = {} else: # no hits found in Home, recommend different search terms: if of.startswith("h"): print_warning(req, _("No public collection matched your query. " "If you were looking for a non-public document, please choose " "the desired restricted collection first.")) results = {} if verbose and of.startswith("h"): t2 = os.times()[4] print_warning(req, "Search stage 4: intersecting with collection universe gave %d hits." % results_nbhits) print_warning(req, "Search stage 4: execution took %.2f seconds." % (t2 - t1)) return results def intersect_results_with_hitset(req, results, hitset, ap=0, aptext="", of="hb"): """Return intersection of search 'results' (a dict of hitsets with collection as key) with the 'hitset', i.e. apply 'hitset' intersection to each collection within search 'results'. If the final 'results' set is to be empty, and 'ap' (approximate pattern) is true, and then print the `warningtext' and return the original 'results' set unchanged. If 'ap' is false, then return empty results set. """ if ap: results_ap = copy.deepcopy(results) else: results_ap = {} # will return empty dict in case of no hits found nb_total = 0 for coll in results.keys(): results[coll].intersect(hitset) results[coll].calculate_nbhits() nb_total += results[coll]._nbhits if nb_total == 0: if of.startswith("h"): print_warning(req, aptext) results = results_ap return results def create_similarly_named_authors_link_box(author_name, ln=cdslang): """Return a box similar to ``Not satisfied...'' one by proposing author searches for similar names. Namely, take AUTHOR_NAME and the first initial of the firstame (after comma) and look into author index whether authors with e.g. middle names exist. Useful mainly for CERN Library that sometimes contains name forms like Ellis-N, Ellis-Nick, Ellis-Nicolas all denoting the same person. The box isn't proposed if no similarly named authors are found to exist. """ # return nothing if not configured: if cfg_create_similarly_named_authors_link_box == 0: return "" # return empty box if there is no initial: if sre.match(r'[^ ,]+, [^ ]', author_name) is None: return "" # firstly find name comma initial: author_name_to_search = sre.sub(r'^([^ ,]+, +[^ ,]).*$', '\\1', author_name) # secondly search for similar name forms: similar_author_names = {} for name in author_name_to_search, strip_accents(author_name_to_search): for tag in get_field_tags("author"): # deduce into which bibxxx table we will search: digit1, digit2 = int(tag[0]), int(tag[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) if len(tag) != 6 or tag[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: query = "SELECT bx.value FROM %s AS bx WHERE bx.value LIKE '%s%%' AND bx.tag LIKE '%s%%'" \ % (bx, escape_string(name), tag) else: query = "SELECT bx.value FROM %s AS bx WHERE bx.value LIKE '%s%%' AND bx.tag='%s'" \ % (bx, escape_string(name), tag) res = run_sql(query) for row in res: similar_author_names[row[0]] = 1 # remove the original name and sort the list: try: del similar_author_names[author_name] except KeyError: pass # thirdly print the box: out = "" if similar_author_names: out_authors = similar_author_names.keys() out_authors.sort() tmp_authors = [] for out_author in out_authors: nbhits = get_nbhits_in_bibxxx(out_author, "author") if nbhits: tmp_authors.append({'nb': nbhits, 'name': out_author}) out += websearch_templates.tmpl_similar_author_names( ln = ln, weburl = weburl, authors = tmp_authors, ) return out def create_nearest_terms_box(urlargs, p, f, t='w', n=5, ln=cdslang, intro_text_p=1): """Return text box containing list of 'n' nearest terms above/below 'p' for the field 'f' for matching type 't' (words/phrases) in language 'ln'. Propose new searches according to `urlargs' with the new words. If `intro_text_p' is true, then display the introductory message, otherwise print only the nearest terms in the box content. """ # load the right message language _ = gettext_set_language(ln) out = "" nearest_terms = [] if not p: # sanity check p = "." # look for nearest terms: if t == 'w': nearest_terms = get_nearest_terms_in_bibwords(p, f, n, n) if not nearest_terms: return "%s %s." % (_("No words index available for"), get_field_i18nname(f, ln)) else: nearest_terms = get_nearest_terms_in_bibxxx(p, f, n, n) if not nearest_terms: return "%s %s." % (_("No phrase index available for"), get_field_i18nname(f, ln)) termargs = [] termhits = [] for term in nearest_terms: if t == 'w': termhits.append(get_nbhits_in_bibwords(term, f)) else: termhits.append(get_nbhits_in_bibxxx(term, f)) termargs.append(urlargs_replace_text_in_arg(urlargs, r'^p\d?$', p, term)) intro = "" if intro_text_p: # add full leading introductory text intro = _("Search term %s") % (p.startswith("%") and p.endswith("%") and p[1:-1] or p) if f: intro += " " + _("inside %s index") % get_field_i18nname(f, ln) intro += " " + _("did not match any record. Nearest terms in any collection are:") return websearch_templates.tmpl_nearest_term_box( p = p, ln = ln, f = f, weburl = weburl, terms = nearest_terms, termargs = termargs, termhits = termhits, intro = intro, ) def get_nearest_terms_in_bibwords(p, f, n_below, n_above): """Return list of +n -n nearest terms to word `p' in index for field `f'.""" nearest_words = [] # will hold the (sorted) list of nearest words to return # deduce into which bibwordsX table we will search: bibwordsX = "idxWORD%02dF" % get_index_id("anyfield") if f: index_id = get_index_id(f) if index_id: bibwordsX = "idxWORD%02dF" % index_id else: return nearest_words # firstly try to get `n' closest words above `p': query = "SELECT term FROM %s WHERE term<'%s' ORDER BY term DESC LIMIT %d" % (bibwordsX, escape_string(p), n_above) res = run_sql(query) for row in res: nearest_words.append(row[0]) nearest_words.reverse() # secondly insert given word `p': nearest_words.append(p) # finally try to get `n' closest words below `p': query = "SELECT term FROM %s WHERE term>'%s' ORDER BY term ASC LIMIT %d" % (bibwordsX, escape_string(p), n_below) res = run_sql(query) for row in res: nearest_words.append(row[0]) return nearest_words def get_nearest_terms_in_bibxxx(p, f, n_below, n_above): """Browse (-n_above, +n_below) closest bibliographic phrases for the given pattern p in the given field f, regardless of collection. Return list of [phrase1, phrase2, ... , phrase_n].""" ## determine browse field: if not f and string.find(p, ":") > 0: # does 'p' contain ':'? f, p = split(p, ":", 1) ## We are going to take max(n_below, n_above) as the number of ## values to ferch from bibXXx. This is needed to work around ## MySQL UTF-8 sorting troubles in 4.0.x. Proper solution is to ## use MySQL 4.1.x or our own idxPHRASE in the future. n_fetch = 2*max(n_below,n_above) ## construct 'tl' which defines the tag list (MARC tags) to search in: tl = [] if str(f[0]).isdigit() and str(f[1]).isdigit(): tl.append(f) # 'f' seems to be okay as it starts by two digits else: # deduce desired MARC tags on the basis of chosen 'f' tl = get_field_tags(f) ## start browsing to fetch list of hits: browsed_phrases = {} # will hold {phrase1: 1, phrase2: 1, ..., phraseN: 1} dict of browsed phrases (to make them unique) # always add self to the results set: browsed_phrases[p] = 1 for t in tl: # deduce into which bibxxx table we will search: digit1, digit2 = int(t[0]), int(t[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) # firstly try to get `n' closest phrases above `p': if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: query = "SELECT bx.value FROM %s AS bx WHERE bx.value<'%s' AND bx.tag LIKE '%s%%' ORDER BY bx.value DESC LIMIT %d" \ % (bx, escape_string(p), t, n_fetch) else: query = "SELECT bx.value FROM %s AS bx WHERE bx.value<'%s' AND bx.tag='%s' ORDER BY bx.value DESC LIMIT %d" \ % (bx, escape_string(p), t, n_fetch) res = run_sql(query) for row in res: browsed_phrases[row[0]] = 1 # secondly try to get `n' closest phrases equal to or below `p': if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: query = "SELECT bx.value FROM %s AS bx WHERE bx.value>='%s' AND bx.tag LIKE '%s%%' ORDER BY bx.value ASC LIMIT %d" \ % (bx, escape_string(p), t, n_fetch) else: query = "SELECT bx.value FROM %s AS bx WHERE bx.value>='%s' AND bx.tag='%s' ORDER BY bx.value ASC LIMIT %d" \ % (bx, escape_string(p), t, n_fetch) res = run_sql(query) for row in res: browsed_phrases[row[0]] = 1 # select first n words only: (this is needed as we were searching # in many different tables and so aren't sure we have more than n # words right; this of course won't be needed when we shall have # one ACC table only for given field): phrases_out = browsed_phrases.keys() phrases_out.sort(lambda x, y: cmp(string.lower(strip_accents(x)), string.lower(strip_accents(y)))) # find position of self: try: idx_p = phrases_out.index(p) except: idx_p = len(phrases_out)/2 # return n_above and n_below: return phrases_out[max(0,idx_p-n_above):idx_p+n_below] def get_nbhits_in_bibwords(word, f): """Return number of hits for word 'word' inside words index for field 'f'.""" out = 0 # deduce into which bibwordsX table we will search: bibwordsX = "idxWORD%02dF" % get_index_id("anyfield") if f: index_id = get_index_id(f) if index_id: bibwordsX = "idxWORD%02dF" % index_id else: return 0 if word: query = "SELECT hitlist FROM %s WHERE term='%s'" % (bibwordsX, escape_string(word)) res = run_sql(query) for hitlist in res: out += Numeric.sum(Numeric.loads(zlib.decompress(hitlist[0])).copy().astype(Numeric.Int)) return out def get_nbhits_in_bibxxx(p, f): """Return number of hits for word 'word' inside words index for field 'f'.""" ## determine browse field: if not f and string.find(p, ":") > 0: # does 'p' contain ':'? f, p = split(p, ":", 1) ## construct 'tl' which defines the tag list (MARC tags) to search in: tl = [] if str(f[0]).isdigit() and str(f[1]).isdigit(): tl.append(f) # 'f' seems to be okay as it starts by two digits else: # deduce desired MARC tags on the basis of chosen 'f' tl = get_field_tags(f) # start searching: recIDs = {} # will hold dict of {recID1: 1, recID2: 1, ..., } (unique recIDs, therefore) for t in tl: # deduce into which bibxxx table we will search: digit1, digit2 = int(t[0]), int(t[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: query = """SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx WHERE bx.value='%s' AND bx.tag LIKE '%s%%' AND bibx.id_bibxxx=bx.id""" \ % (bibx, bx, escape_string(p), t) else: query = """SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx WHERE bx.value='%s' AND bx.tag='%s' AND bibx.id_bibxxx=bx.id""" \ % (bibx, bx, escape_string(p), t) res = run_sql(query) for row in res: recIDs[row[0]] = 1 return len(recIDs) def get_mysql_recid_from_aleph_sysno(sysno): """Returns MySQL's recID for ALEPH sysno passed in the argument (e.g. "002379334CER"). Returns None in case of failure.""" out = None query = "SELECT bb.id_bibrec FROM bibrec_bib97x AS bb, bib97x AS b WHERE b.value='%s' AND b.tag='970__a' AND bb.id_bibxxx=b.id" %\ (escape_string(sysno)) res = run_sql(query, None, 1) if res: out = res[0][0] return out def guess_primary_collection_of_a_record(recID): """Return primary collection name a record recid belongs to, by testing 980 identifier. May lead to bad guesses when a collection is defined dynamically bia dbquery. In that case, return 'cdsname'.""" out = cdsname dbcollids = get_fieldvalues(recID, "980__a") if dbcollids: dbquery = "collection:" + dbcollids[0] res = run_sql("SELECT name FROM collection WHERE dbquery=%s", (dbquery,)) if res: out = res[0][0] return out def get_tag_name(tag_value, prolog="", epilog=""): """Return tag name from the known tag value, by looking up the 'tag' table. Return empty string in case of failure. Example: input='100__%', output=first author'.""" out = "" res = run_sql("SELECT name FROM tag WHERE value=%s", (tag_value,)) if res: out = prolog + res[0][0] + epilog return out def get_fieldcodes(): """Returns a list of field codes that may have been passed as 'search options' in URL. Example: output=['subject','division'].""" out = [] res = run_sql("SELECT DISTINCT(code) FROM field") for row in res: out.append(row[0]) return out def get_field_tags(field): """Returns a list of MARC tags for the field code 'field'. Returns empty list in case of error. Example: field='author', output=['100__%','700__%'].""" out = [] query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f WHERE f.code='%s' AND ft.id_field=f.id AND t.id=ft.id_tag ORDER BY ft.score DESC""" % field res = run_sql(query) for val in res: out.append(val[0]) return out def get_fieldvalues(recID, tag): """Return list of field values for field TAG inside record RECID.""" out = [] if tag == "001___": # we have asked for recID that is not stored in bibXXx tables out.append(str(recID)) else: # we are going to look inside bibXXx tables digit = tag[0:2] bx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \ "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag) res = run_sql(query) for row in res: out.append(row[0]) return out def get_fieldvalues_alephseq_like(recID, tags): """Return textual lines in ALEPH sequential like format for field 'tag' inside record 'recID'.""" out = "" # clean passed 'tag': tags_in = string.split(tags, ",") if len(tags_in) == 1 and len(tags_in[0]) == 6: ## case A: one concrete subfield asked, so print its value if found ## (use with care: can false you if field has multiple occurrences) out += string.join(get_fieldvalues(recID, tags_in[0]),"\n") else: ## case B: print our "text MARC" format; works safely all the time tags_out = [] for tag in tags_in: if len(tag) == 0: for i in range(0,10): for j in range(0,10): tags_out.append("%d%d%%" % (i, j)) elif len(tag) == 1: for j in range(0,10): tags_out.append("%s%d%%" % (tag, j)) elif len(tag) < 5: tags_out.append("%s%%" % tag) elif tag >= 6: tags_out.append(tag[0:5]) # search all bibXXx tables as needed: for tag in tags_out: digits = tag[0:2] if tag.startswith("001") or tag.startswith("00%"): if out: out += "\n" out += "%09d %s %d" % (recID, "001__", recID) bx = "bib%sx" % digits bibx = "bibrec_bib%sx" % digits query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx, recID, tag) res = run_sql(query) # go through fields: field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] if ind1 == "_": ind1 = "" if ind2 == "_": ind2 = "" # print field tag if field_number != field_number_old or field[:-1] != field_old[:-1]: if out: out += "\n" out += "%09d %s " % (recID, field[:5]) field_number_old = field_number field_old = field # print subfield value out += "$$%s%s" % (field[-1:], value) return out def record_exists(recID): """Return 1 if record RECID exists. Return 0 if it doesn't exist. Return -1 if it exists but is marked as deleted.""" out = 0 query = "SELECT id FROM bibrec WHERE id='%s'" % recID res = run_sql(query, None, 1) if res: # record exists; now check whether it isn't marked as deleted: dbcollids = get_fieldvalues(recID, "980__%") if ("DELETED" in dbcollids) or (cfg_cern_site and "DUMMY" in dbcollids): out = -1 # exists, but marked as deleted else: out = 1 # exists fine return out def record_public_p(recID): """Return 1 if the record is public, i.e. if it can be found in the Home collection. Return 0 otherwise. """ return get_collection_reclist(cdsname).contains(recID) def get_creation_date(recID, fmt="%Y-%m-%d"): "Returns the creation date of the record 'recID'." out = "" res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1) if res: out = res[0][0] return out def get_modification_date(recID, fmt="%Y-%m-%d"): "Returns the date of last modification for the record 'recID'." out = "" res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1) if res: out = res[0][0] return out def print_warning(req, msg, type='', prologue='
', epilogue='
'): "Prints warning message and flushes output." if req and msg: req.write(websearch_templates.tmpl_print_warning( msg = msg, type = type, prologue = prologue, epilogue = epilogue, )) return def print_search_info(p, f, sf, so, sp, rm, of, ot, collection=cdsname, nb_found=-1, jrec=1, rg=10, as=0, ln=cdslang, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="", sc=1, pl_in_url="", d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, cpu_time=-1, middle_only=0): """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time. Also, prints navigation links (beg/next/prev/end) inside the results set. If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links. This is suitable for displaying navigation links at the bottom of the search results page.""" out = "" # sanity check: if jrec < 1: jrec = 1 if jrec > nb_found: jrec = max(nb_found-rg+1, 1) return websearch_templates.tmpl_print_search_info( ln = ln, weburl = weburl, collection = collection, as = as, collection_name = get_coll_i18nname(collection, ln), middle_only = middle_only, rg = rg, nb_found = nb_found, sf = sf, so = so, rm = rm, of = of, ot = ot, p = p, f = f, p1 = p1, p2 = p2, p3 = p3, f1 = f1, f2 = f2, f3 = f3, m1 = m1, m2 = m2, m3 = m3, op1 = op1, op2 = op2, pl_in_url = pl_in_url, d1y = d1y, d1m = d1m, d1d = d1d, d2y = d2y, d2m = d2m, d2d = d2d, jrec = jrec, sc = sc, sp = sp, all_fieldcodes = get_fieldcodes(), cpu_time = cpu_time, ) def print_results_overview(req, colls, results_final_nb_total, results_final_nb, cpu_time, ln=cdslang): "Prints results overview box with links to particular collections below." out = "" new_colls = [] for coll in colls: new_colls.append({ 'code' : coll, 'name' : get_coll_i18nname(coll, ln), }) # deduce url without 'of' argument: args = req.args or '' url_args = sre.sub(r'(^|\&)of=.*?(\&|$)',r'\1',args) url_args = sre.sub(r'^\&+', '', url_args) url_args = sre.sub(r'\&+$', '', url_args) return websearch_templates.tmpl_print_results_overview( ln = ln, weburl = weburl, results_final_nb_total = results_final_nb_total, results_final_nb = results_final_nb, cpu_time = cpu_time, colls = new_colls, url_args = url_args, ) def sort_records(req, recIDs, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=cdslang): """Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'. If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by 'sort pattern', for example "sort by report number that starts by CERN-PS". Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly.""" _ = gettext_set_language(ln) ## check arguments: if not sort_field: return recIDs if len(recIDs) > cfg_nb_records_to_sort: if of.startswith('h'): print_warning(req, _("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order (\"latest first\").") % cfg_nb_records_to_sort, "Warning") return recIDs sort_fields = string.split(sort_field, ",") recIDs_dict = {} recIDs_out = [] ## first deduce sorting MARC tag out of the 'sort_field' argument: tags = [] for sort_field in sort_fields: if sort_field and str(sort_field[0:2]).isdigit(): # sort_field starts by two digits, so this is probably a MARC tag already tags.append(sort_field) else: # let us check the 'field' table query = """SELECT DISTINCT(t.value) FROM tag AS t, field_tag AS ft, field AS f WHERE f.code='%s' AND ft.id_field=f.id AND t.id=ft.id_tag ORDER BY ft.score DESC""" % sort_field res = run_sql(query) if res: for row in res: tags.append(row[0]) else: if of.startswith('h'): print_warning(req, _("Sorry, '%s' does not seem to be a valid sort option. Choosing title sort instead.") % sort_field, "Error") tags.append("245__a") if verbose >= 3: print_warning(req, "Sorting by tags %s." % tags) if sort_pattern: print_warning(req, "Sorting preferentially by %s." % sort_pattern) ## check if we have sorting tag defined: if tags: # fetch the necessary field values: for recID in recIDs: val = "" # will hold value for recID according to which sort vals = [] # will hold all values found in sorting tag for recID for tag in tags: vals.extend(get_fieldvalues(recID, tag)) if sort_pattern: # try to pick that tag value that corresponds to sort pattern bingo = 0 for v in vals: if v.startswith(sort_pattern): # bingo! bingo = 1 val = v break if not bingo: # sort_pattern not present, so add other vals after spaces val = sort_pattern + " " + string.join(vals) else: # no sort pattern defined, so join them all together val = string.join(vals) val = val.lower() if recIDs_dict.has_key(val): recIDs_dict[val].append(recID) else: recIDs_dict[val] = [recID] # sort them: recIDs_dict_keys = recIDs_dict.keys() recIDs_dict_keys.sort() # now that keys are sorted, create output array: for k in recIDs_dict_keys: for s in recIDs_dict[k]: recIDs_out.append(s) # ascending or descending? if sort_order == 'a': recIDs_out.reverse() # okay, we are done return recIDs_out else: # good, no sort needed return recIDs def print_record_list_for_similarity_boxen(req, title, recID_score_list, ln=cdslang): """Print list of records in the "hs" (HTML Similarity) format for similarity boxes. FIXME: templatize. """ recID_score_list_to_be_printed = [] # firstly find 5 first public records to print: nb_records_to_be_printed = 0 nb_records_seen = 0 while nb_records_to_be_printed < 5 and nb_records_seen < len(recID_score_list) and nb_records_seen < 50: # looking through first 50 records only, picking first 5 public ones (recID, score) = recID_score_list[nb_records_seen] nb_records_seen += 1 if record_public_p(recID): nb_records_to_be_printed += 1 recID_score_list_to_be_printed.append([recID,score]) # secondly print them: if nb_records_to_be_printed > 0: req.write("""
""") req.write("""
%s
""" % title) req.write("""
""") for (recID, score) in recID_score_list_to_be_printed: req.write("""""" % \ (score,print_record(recID, format="hs", ln=ln))) req.write("""
(%s)  %s
""") return def print_records(req, recIDs, jrec=1, rg=10, format='hb', ot='', ln=cdslang, relevances=[], relevances_prologue="(", relevances_epilogue="%%)", decompress=zlib.decompress): """Prints list of records 'recIDs' formatted accoding to 'format' in groups of 'rg' starting from 'jrec'. Assumes that the input list 'recIDs' is sorted in reverse order, so it counts records from tail to head. A value of 'rg=-9999' means to print all records: to be used with care. Print also list of RELEVANCES for each record (if defined), in between RELEVANCE_PROLOGUE and RELEVANCE_EPILOGUE. """ # load the right message language _ = gettext_set_language(ln) # sanity checking: if req == None: return if len(recIDs): nb_found = len(recIDs) if rg == -9999: # print all records rg = nb_found else: rg = abs(rg) if jrec < 1: # sanity checks jrec = 1 if jrec > nb_found: jrec = max(nb_found-rg+1, 1) # will print records from irec_max to irec_min excluded: irec_max = nb_found - jrec irec_min = nb_found - jrec - rg if irec_min < 0: irec_min = -1 if irec_max >= nb_found: irec_max = nb_found - 1 #req.write("%s:%d-%d" % (recIDs, irec_min, irec_max)) if format.startswith('x'): # we are doing XML output: for irec in range(irec_max,irec_min,-1): req.write(print_record(recIDs[irec], format, ot, ln)) elif format.startswith('t') or str(format[0:3]).isdigit(): # we are doing plain text output: for irec in range(irec_max,irec_min,-1): x = print_record(recIDs[irec], format, ot, ln) req.write(x) if x: req.write('\n') else: # we are doing HTML output: if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"): # portfolio and on-the-fly formats: for irec in range(irec_max,irec_min,-1): req.write(print_record(recIDs[irec], format, ot, ln)) elif format.startswith("hb"): # HTML brief format: rows = [] for irec in range(irec_max,irec_min,-1): temp = { 'number' : jrec+irec_max-irec, 'recid' : recIDs[irec], } if relevances and relevances[irec]: temp['relevance'] = relevances[irec] else: temp['relevance'] = '' temp['record'] = print_record(recIDs[irec], format, ot, ln) rows.append(temp) req.write(websearch_templates.tmpl_records_format_htmlbrief( ln = ln, weburl = weburl, rows = rows, relevances_prologue = relevances_prologue, relevances_epilogue = relevances_epilogue, )) else: # HTML detailed format: # deduce url without 'of' argument: url_args = sre.sub(r'(^|\&)of=.*?(\&|$)',r'\1',req.args) url_args = sre.sub(r'^\&+', '', url_args) url_args = sre.sub(r'\&+$', '', url_args) # print other formatting choices: rows = [] for irec in range(irec_max,irec_min,-1): temp = { 'record' : print_record(recIDs[irec], format, ot, ln), 'recid' : recIDs[irec], 'creationdate': '', 'modifydate' : '', } if record_exists(recIDs[irec])==1: temp['creationdate'] = get_creation_date(recIDs[irec]) temp['modifydate'] = get_modification_date(recIDs[irec]) if cfg_experimental_features: r = calculate_cited_by_list(recIDs[irec]) if r: temp ['citinglist'] = r temp ['citationhistory'] = create_citation_history_graph_and_box(recIDs[irec], ln) r = calculate_co_cited_with_list(recIDs[irec]) if r: temp ['cociting'] = r r = calculate_reading_similarity_list(recIDs[irec], "downloads") if r: temp ['downloadsimilarity'] = r temp ['downloadhistory'] = create_download_history_graph_and_box(recIDs[irec], ln) # Get comments and reviews for this record if exist # FIXME: templatize me if cfg_webcomment_allow_comments or cfg_webcomment_allow_reviews: from webcomment import get_first_comments_or_remarks (comments, reviews) = get_first_comments_or_remarks(recID=recIDs[irec], ln=ln, nb_comments=cfg_webcomment_nb_comments_in_detailed_view, nb_reviews=cfg_webcomment_nb_reviews_in_detailed_view) temp['comments'] = comments temp['reviews'] = reviews r = calculate_reading_similarity_list(recIDs[irec], "pageviews") if r: temp ['viewsimilarity'] = r rows.append(temp) req.write(websearch_templates.tmpl_records_format_other( ln = ln, weburl = weburl, url_args = url_args, rows = rows, format = format, )) else: print_warning(req, _("Use different search terms.")) def print_record(recID, format='hb', ot='', ln=cdslang, decompress=zlib.decompress): "Prints record 'recID' formatted accoding to 'format'." _ = gettext_set_language(ln) out = "" # sanity check: record_exist_p = record_exists(recID) if record_exist_p == 0: # doesn't exist return out # print record opening tags, if needed: if format == "marcxml" or format == "oai_dc": out += " \n" out += "

\n" for id in get_fieldvalues(recID,oaiidfield): out += " %s\n" % id out += " %s\n" % get_modification_date(recID) out += "
\n" out += " \n" if format.startswith("xm") or format == "marcxml": # look for detailed format existence: query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format) res = run_sql(query, None, 1) if res and record_exist_p==1: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format' -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables: if format == "marcxml": out += """ \n""" out += " %d\n" % int(recID) elif format.startswith("xm"): out += """ \n""" out += " %d\n" % int(recID) if record_exist_p == -1: # deleted record, so display only OAI ID and 980: oai_ids = get_fieldvalues(recID, cfg_oaiidtag) if oai_ids: out += "%s\n" % \ (cfg_oaiidtag[0:3], cfg_oaiidtag[3:4], cfg_oaiidtag[4:5], cfg_oaiidtag[5:6], oai_ids[0]) out += "DELETED\n" else: for digit1 in range(0,10): for digit2 in range(0,10): bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx, recID, str(digit1)+str(digit2)) res = run_sql(query) field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] if ind1 == "_": ind1 = "" if ind2 == "_": ind2 = "" # print field tag if field_number != field_number_old or field[:-1] != field_old[:-1]: if format.startswith("xm") or format == "marcxml": fieldid = encode_for_xml(field[0:3]) if field_number_old != -999: out += """ \n""" out += """ \n""" % \ (encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2)) field_number_old = field_number field_old = field # print subfield value if format.startswith("xm") or format == "marcxml": value = encode_for_xml(value) out += """ %s\n""" % (encode_for_xml(field[-1:]), value) # all fields/subfields printed in this run, so close the tag: if (format.startswith("xm") or format == "marcxml") and field_number_old != -999: out += """ \n""" # we are at the end of printing the record: if format.startswith("xm") or format == "marcxml": out += " \n" elif format == "xd" or format == "oai_dc": # XML Dublin Core format, possibly OAI -- select only some bibXXx fields: out += """ \n""" if record_exist_p == -1: out += "" else: for f in get_fieldvalues(recID, "041__a"): out += " %s\n" % f for f in get_fieldvalues(recID, "100__a"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "700__a"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "245__a"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "65017a"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "8564_u"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "520__a"): out += " %s\n" % encode_for_xml(f) out += " %s\n" % get_creation_date(recID) out += " \n" elif format.startswith("x_"): # underscore means that XML formats should be called on-the-fly; suitable for testing formats out += call_bibformat(recID, format) elif str(format[0:3]).isdigit(): # user has asked to print some fields only if format == "001": out += "%s\n" % (format, recID, format) else: vals = get_fieldvalues(recID, format) for val in vals: out += "%s\n" % (format, val, format) elif format.startswith('t'): ## user directly asked for some tags to be displayed only if record_exist_p == -1: out += get_fieldvalues_alephseq_like(recID, "001,%s,980" % cfg_oaiidtag) else: out += get_fieldvalues_alephseq_like(recID, ot) elif format == "hm": if record_exist_p == -1: out += "
" + cgi.escape(get_fieldvalues_alephseq_like(recID, "001,%s,980" % cfg_oaiidtag)) + "
" else: out += "
" + cgi.escape(get_fieldvalues_alephseq_like(recID, ot)) + "
" elif format.startswith("h") and ot: ## user directly asked for some tags to be displayed only if record_exist_p == -1: out += "
" + get_fieldvalues_alephseq_like(recID, "001,%s,980" % cfg_oaiidtag) + "
" else: out += "
" + get_fieldvalues_alephseq_like(recID, ot) + "
" elif format == "hd": # HTML detailed format if record_exist_p == -1: out += _("The record has been deleted.") else: # look for detailed format existence: query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format) res = run_sql(query, None, 1) if res: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format', so either call BibFormat on the fly or use default format # second, see if we are calling BibFormat on the fly: if cfg_call_bibformat: xfm = call_bibformat(recID, format) dom = minidom.parseString(xfm) for e in dom.getElementsByTagName('subfield'): if e.getAttribute('code') == 'g': for t in e.childNodes: out += t.data.encode('utf-8') else: out += websearch_templates.tmpl_print_record_detailed( ln = ln, recID = recID, weburl = weburl, ) elif format.startswith("hb_") or format.startswith("hd_"): # underscore means that HTML brief/detailed formats should be called on-the-fly; suitable for testing formats if record_exist_p == -1: out += _("The record has been deleted.") else: out += call_bibformat(recID, format) elif format.startswith("hx"): # BibTeX format, called on the fly: if record_exist_p == -1: out += _("The record has been deleted.") else: out += call_bibformat(recID, format) elif format.startswith("hs"): # for citation/download similarity navigation links: if record_exist_p == -1: out += _("The record has been deleted.") else: out += """""" % (weburl, recID, ln) # firstly, title: titles = get_fieldvalues(recID, "245__a") if titles: for title in titles: out += "%s" % title else: # usual title not found, try conference title: titles = get_fieldvalues(recID, "111__a") if titles: for title in titles: out += "%s" % title else: # just print record ID: out += "%s %d" % (get_field_i18nname("record ID", ln), recID) out += "" # secondly, authors: authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a") if authors: out += " - %s" % authors[0] if len(authors) > 1: out += " et al" # thirdly publication info: publinfos = get_fieldvalues(recID, "773__s") if not publinfos: publinfos = get_fieldvalues(recID, "909C4s") if not publinfos: publinfos = get_fieldvalues(recID, "037__a") if not publinfos: publinfos = get_fieldvalues(recID, "088__a") if publinfos: out += " - %s" % publinfos[0] else: # fourthly publication year (if not publication info): years = get_fieldvalues(recID, "773__y") if not years: years = get_fieldvalues(recID, "909C4y") if not years: years = get_fieldvalues(recID, "260__c") if years: out += " (%s)" % years[0] else: # HTML brief format by default if record_exist_p == -1: out += _("The record has been deleted.") else: query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format) res = run_sql(query) if res: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: out += websearch_templates.tmpl_print_record_brief( ln = ln, recID = recID, weburl = weburl, ) # at the end of HTML brief mode, print the "Detailed record" functionality: if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"): pass # do nothing for portfolio and on-the-fly formats else: out += websearch_templates.tmpl_print_record_brief_links( ln = ln, recID = recID, weburl = weburl, ) # print record closing tags, if needed: if format == "marcxml" or format == "oai_dc": out += "
\n" out += " \n" return out def encode_for_xml(s): "Encode special chars in string so that it would be XML-compliant." s = string.replace(s, '&', '&') s = string.replace(s, '<', '<') return s def call_bibformat(id, otype="HD"): """Calls BibFormat for the record 'id'. Desired BibFormat output type is passed in 'otype' argument. This function is mainly used to display full format, if they are not stored in the 'bibfmt' table.""" pipe_input, pipe_output, pipe_error = os.popen3(["%s/bibformat" % bindir, "otype=%s" % otype], 'rw') pipe_input.write(print_record(id, "xm")) pipe_input.close() out = pipe_output.read() pipe_output.close() pipe_error.close() return out def log_query(hostname, query_args, uid=-1): """Log query into the query and user_query tables.""" if uid > 0: # log the query only if uid is reasonable res = run_sql("SELECT id FROM query WHERE urlargs=%s", (query_args,), 1) try: id_query = res[0][0] except: id_query = run_sql("INSERT INTO query (type, urlargs) VALUES ('r', %s)", (query_args,)) if id_query: run_sql("INSERT INTO user_query (id_user, id_query, hostname, date) VALUES (%s, %s, %s, %s)", (uid, id_query, hostname, - time.strftime("%04Y-%02m-%02d %02H:%02M:%02S", time.localtime()))) + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) return def log_query_info(action, p, f, colls, nb_records_found_total=-1): """Write some info to the log file for later analysis.""" try: log = open(logdir + "/search.log", "a") - log.write(time.strftime("%04Y%02m%02d%02H%02M%02S#", time.localtime())) + log.write(time.strftime("%Y%m%d%H%M%S#", time.localtime())) log.write(action+"#") log.write(p+"#") log.write(f+"#") for coll in colls[:-1]: log.write("%s," % coll) log.write("%s#" % colls[-1]) log.write("%d" % nb_records_found_total) log.write("\n") log.close() except: pass return def wash_url_argument(var, new_type): """Wash list argument into 'new_type', that can be 'list', 'str', or 'int'. Useful for washing mod_python passed arguments, that are all lists of strings (URL args may be multiple), but we sometimes want only to take the first value, and sometimes to represent it as string or numerical value.""" out = [] if new_type == 'list': # return lst if type(var) is list: out = var else: out = [var] elif new_type == 'str': # return str if type(var) is list: try: out = "%s" % var[0] except: out = "" elif type(var) is str: out = var else: out = "%s" % var elif new_type == 'int': # return int if type(var) is list: try: out = string.atoi(var[0]) except: out = 0 elif type(var) is int: out = var elif type(var) is str: try: out = string.atoi(var) except: out = 0 else: out = 0 return out ### CALLABLES def perform_request_search(req=None, cc=cdsname, c=None, p="", f="", rg="10", sf="", so="d", sp="", rm="", of="id", ot="", as="0", p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc="0", jrec="0", recid="-1", recidb="-1", sysno="", id="-1", idb="-1", sysnb="", action="", d1y="0", d1m="0", d1d="0", d2y="0", d2m="0", d2d="0", verbose="0", ap="0", ln=cdslang): """Perform search or browse request, without checking for authentication. Return list of recIDs found, if of=id. Otherwise create web page. The arguments are as follows: req - mod_python Request class instance. cc - current collection (e.g. "ATLAS"). The collection the user started to search/browse from. c - collectin list (e.g. ["Theses", "Books"]). The collections user may have selected/deselected when starting to search from 'cc'. p - pattern to search for (e.g. "ellis and muon or kaon"). f - field to search within (e.g. "author"). rg - records in groups of (e.g. "10"). Defines how many hits per collection in the search results page are displayed. sf - sort field (e.g. "title"). so - sort order ("a"=ascending, "d"=descending). sp - sort pattern (e.g. "CERN-") -- in case there are more values in a sort field, this argument tells which one to prefer rm - ranking method (e.g. "jif"). Defines whether results should be ranked by some known ranking method. of - output format (e.g. "hb"). Usually starting "h" means HTML output (and "hb" for HTML brief, "hd" for HTML detailed), "x" means XML output, "t" means plain text output, "id" means no output at all but to return list of recIDs found. (Suitable for high-level API.) ot - output only these MARC tags (e.g. "100,700,909C0b"). Useful if only some fields are to be shown in the output, e.g. for library to control some fields. as - advanced search ("0" means no, "1" means yes). Whether search was called from within the advanced search interface. p1 - first pattern to search for in the advanced search interface. Much like 'p'. f1 - first field to search within in the advanced search interface. Much like 'f'. m1 - first matching type in the advanced search interface. ("a" all of the words, "o" any of the words, "e" exact phrase, "p" partial phrase, "r" regular expression). op1 - first operator, to join the first and the second unit in the advanced search interface. ("a" add, "o" or, "n" not). p2 - second pattern to search for in the advanced search interface. Much like 'p'. f2 - second field to search within in the advanced search interface. Much like 'f'. m2 - second matching type in the advanced search interface. ("a" all of the words, "o" any of the words, "e" exact phrase, "p" partial phrase, "r" regular expression). op2 - second operator, to join the second and the third unit in the advanced search interface. ("a" add, "o" or, "n" not). p3 - third pattern to search for in the advanced search interface. Much like 'p'. f3 - third field to search within in the advanced search interface. Much like 'f'. m3 - third matching type in the advanced search interface. ("a" all of the words, "o" any of the words, "e" exact phrase, "p" partial phrase, "r" regular expression). sc - split by collection ("0" no, "1" yes). Governs whether we want to present the results in a single huge list, or splitted by collection. jrec - jump to record (e.g. "234"). Used for navigation inside the search results. recid - display record ID (e.g. "20000"). Do not search/browse but go straight away to the Detailed record page for the given recID. recidb - display record ID bis (e.g. "20010"). If greater than 'recid', then display records from recid to recidb. Useful for example for dumping records from the database for reformatting. sysno - display old system SYS number (e.g. ""). If you migrate to CDSware from another system, and store your old SYS call numbers, you can use them instead of recid if you wish so. id - the same as recid, in case recid is not set. For backwards compatibility. idb - the same as recid, in case recidb is not set. For backwards compatibility. sysnb - the same as sysno, in case sysno is not set. For backwards compatibility. action - action to do. "SEARCH" for searching, "Browse" for browsing. Default is to search. d1y - first date year (e.g. "1998"). Useful for search limits on creation date. d1m - first date month (e.g. "08"). Useful for search limits on creation date. d1d - first date day (e.g. "23"). Useful for search limits on creation date. d2y - second date year (e.g. "1998"). Useful for search limits on creation date. d2m - second date month (e.g. "09"). Useful for search limits on creation date. d2d - second date day (e.g. "02"). Useful for search limits on creation date. verbose - verbose level (0=min, 9=max). Useful to print some internal information on the searching process in case something goes wrong. ap - alternative patterns (0=no, 1=yes). In case no exact match is found, the search engine can try alternative patterns e.g. to replace non-alphanumeric characters by a boolean query. ap defines if this is wanted. ln - language of the search interface (e.g. "en"). Useful for internationalization. """ # wash all passed arguments: cc = wash_url_argument(cc, 'str') sc = wash_url_argument(sc, 'int') (cc, colls_to_display, colls_to_search) = wash_colls(cc, c, sc) # which colls to search and to display? p = wash_pattern(wash_url_argument(p, 'str')) f = wash_field(wash_url_argument(f, 'str')) rg = wash_url_argument(rg, 'int') sf = wash_url_argument(sf, 'str') so = wash_url_argument(so, 'str') sp = wash_url_argument(sp, 'str') rm = wash_url_argument(rm, 'str') of = wash_url_argument(of, 'str') if type(ot) is list: ot = string.join(ot,",") ot = wash_url_argument(ot, 'str') as = wash_url_argument(as, 'int') p1 = wash_pattern(wash_url_argument(p1, 'str')) f1 = wash_field(wash_url_argument(f1, 'str')) m1 = wash_url_argument(m1, 'str') op1 = wash_url_argument(op1, 'str') p2 = wash_pattern(wash_url_argument(p2, 'str')) f2 = wash_field(wash_url_argument(f2, 'str')) m2 = wash_url_argument(m2, 'str') op2 = wash_url_argument(op2, 'str') p3 = wash_pattern(wash_url_argument(p3, 'str')) f3 = wash_field(wash_url_argument(f3, 'str')) m3 = wash_url_argument(m3, 'str') jrec = wash_url_argument(jrec, 'int') recid = wash_url_argument(recid, 'int') recidb = wash_url_argument(recidb, 'int') sysno = wash_url_argument(sysno, 'str') id = wash_url_argument(id, 'int') idb = wash_url_argument(idb, 'int') sysnb = wash_url_argument(sysnb, 'str') action = wash_url_argument(action, 'str') d1y = wash_url_argument(d1y, 'int') d1m = wash_url_argument(d1m, 'int') d1d = wash_url_argument(d1d, 'int') d2y = wash_url_argument(d2y, 'int') d2m = wash_url_argument(d2m, 'int') d2d = wash_url_argument(d2d, 'int') day1, day2 = wash_dates(d1y, d1m, d1d, d2y, d2m, d2d) verbose = wash_url_argument(verbose, 'int') ap = wash_url_argument(ap, 'int') ln = wash_language(ln) _ = gettext_set_language(ln) # backwards compatibility: id, idb, sysnb -> recid, recidb, sysno (if applicable) if sysnb != "" and sysno == "": sysno = sysnb if id > 0 and recid == -1: recid = id if idb > 0 and recidb == -1: recidb = idb # TODO deduce passed search limiting criterias (if applicable) pl, pl_in_url = "", "" # no limits by default if action != _("Browse") and req and req.args: # we do not want to add options while browsing or while calling via command-line fieldargs = cgi.parse_qs(req.args) for fieldcode in get_fieldcodes(): if fieldargs.has_key(fieldcode): for val in fieldargs[fieldcode]: pl += "+%s:\"%s\" " % (fieldcode, val) pl_in_url += "&%s=%s" % (urllib.quote(fieldcode), urllib.quote(val)) # deduce recid from sysno argument (if applicable): if sysno: # ALEPH SYS number was passed, so deduce MySQL recID for the record: recid = get_mysql_recid_from_aleph_sysno(sysno) # deduce collection we are in (if applicable): if recid>0: cc = guess_primary_collection_of_a_record(recid) # deduce user id (if applicable): try: uid = getUid(req) except: uid = 0 ## 0 - start output if recid>0: ## 1 - detailed record display page_start(req, of, cc, as, ln, uid, _("Detailed record") + " #%d" % recid) if of == "hb": of = "hd" if record_exists(recid): if recidb<=recid: # sanity check recidb=recid+1 print_records(req, range(recid,recidb), -1, -9999, of, ot, ln) if req and of.startswith("h"): # register detailed record page view event client_ip_address = str(req.get_remote_host(apache.REMOTE_NOLOOKUP)) register_page_view_event(recid, uid, client_ip_address) else: # record does not exist if of.startswith("h"): print_warning(req, "Requested record does not seem to exist.") elif action == _("Browse"): ## 2 - browse needed page_start(req, of, cc, as, ln, uid, _("Browse")) if of.startswith("h"): req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, action)) try: if as==1 or (p1 or p2 or p3): browse_pattern(req, colls_to_search, p1, f1, rg) browse_pattern(req, colls_to_search, p2, f2, rg) browse_pattern(req, colls_to_search, p3, f3, rg) else: browse_pattern(req, colls_to_search, p, f, rg) except: if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) return page_end(req, of, ln) elif rm and p.startswith("recid:"): ## 3-ter - similarity search needed page_start(req, of, cc, as, ln, uid, _("Search Results")) if of.startswith("h"): req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, action)) if record_exists(p[6:]) != 1: # record does not exist if of.startswith("h"): print_warning(req, "Requested record does not seem to exist.") if of == "id": return [] else: # record well exists, so find similar ones to it t1 = os.times()[4] results_similar_recIDs, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, results_similar_comments = \ rank_records(rm, 0, get_collection_reclist(cdsname), string.split(p), verbose) if results_similar_recIDs: t2 = os.times()[4] cpu_time = t2 - t1 if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, cdsname, len(results_similar_recIDs), jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, cpu_time)) print_warning(req, results_similar_comments) print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue) elif of=="id": return results_similar_recIDs else: # rank_records failed and returned some error message to display: if of.startswith("h"): print_warning(req, results_similar_relevances_prologue) print_warning(req, results_similar_relevances_epilogue) print_warning(req, results_similar_comments) if of == "id": return [] elif cfg_experimental_features and p.startswith("cocitedwith:"): ## 3-terter - cited by search needed page_start(req, of, cc, as, ln, uid, _("Search Results")) if of.startswith("h"): req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, action)) recID = p[12:] if record_exists(recID) != 1: # record does not exist if of.startswith("h"): print_warning(req, "Requested record does not seem to exist.") if of == "id": return [] else: # record well exists, so find co-cited ones: t1 = os.times()[4] results_cocited_recIDs = map(lambda x: x[0], calculate_co_cited_with_list(int(recID))) if results_cocited_recIDs: t2 = os.times()[4] cpu_time = t2 - t1 if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, cdsname, len(results_cocited_recIDs), jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, cpu_time)) print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln) elif of=="id": return results_cocited_recIDs else: # cited rank_records failed and returned some error message to display: if of.startswith("h"): print_warning(req, "nothing found") if of == "id": return [] else: ## 3 - common search needed page_start(req, of, cc, as, ln, uid, _("Search Results")) if of.startswith("h"): req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, as, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, action)) t1 = os.times()[4] results_in_any_collection = HitSet() if as == 1 or (p1 or p2 or p3): ## 3A - advanced search try: results_in_any_collection = search_pattern(req, p1, f1, m1, ap=ap, of=of, verbose=verbose, ln=ln) if results_in_any_collection._nbhits == 0: if of.startswith("h"): req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) if p2: results_tmp = search_pattern(req, p2, f2, m2, ap=ap, of=of, verbose=verbose, ln=ln) if op1 == "a": # add results_in_any_collection.intersect(results_tmp) elif op1 == "o": # or results_in_any_collection.union(results_tmp) elif op1 == "n": # not results_in_any_collection.difference(results_tmp) else: if of.startswith("h"): print_warning(req, "Invalid set operation %s." % op1, "Error") results_in_any_collection.calculate_nbhits() if results_in_any_collection._nbhits == 0: if of.startswith("h"): req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) if p3: results_tmp = search_pattern(req, p3, f3, m3, ap=ap, of=of, verbose=verbose, ln=ln) if op2 == "a": # add results_in_any_collection.intersect(results_tmp) elif op2 == "o": # or results_in_any_collection.union(results_tmp) elif op2 == "n": # not results_in_any_collection.difference(results_tmp) else: if of.startswith("h"): print_warning(req, "Invalid set operation %s." % op2, "Error") results_in_any_collection.calculate_nbhits() except: if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) else: ## 3B - simple search try: results_in_any_collection = search_pattern(req, p, f, ap=ap, of=of, verbose=verbose, ln=ln) except: if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) if results_in_any_collection._nbhits == 0: if of.startswith("h"): req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) # search_cache_key = p+"@"+f+"@"+string.join(colls_to_search,",") # if search_cache.has_key(search_cache_key): # is the result in search cache? # results_final = search_cache[search_cache_key] # else: # results_final = search_pattern(req, p, f, colls_to_search) # search_cache[search_cache_key] = results_final # if len(search_cache) > cfg_search_cache_size: # is the cache full? (sanity cleaning) # search_cache.clear() # search stage 4: intersection with collection universe: try: results_final = intersect_results_with_collrecs(req, results_in_any_collection, colls_to_search, ap, of, verbose, ln) except: if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) if results_final == {}: if of.startswith("h"): req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) # search stage 5: apply search option limits and restrictions: if day1 != "": try: results_final = intersect_results_with_hitset(req, results_final, search_unit_in_bibrec(day1, day2), ap, aptext= _("No match within your time limits, " "discarding this condition..."), of=of) except: if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) if results_final == {}: if of.startswith("h"): req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) if pl: try: results_final = intersect_results_with_hitset(req, results_final, search_pattern(req, pl, ap=0, ln=ln), ap, aptext=_("No match within your search limits, " "discarding this condition..."), of=of) except: if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) if results_final == {}: if of.startswith("h"): req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) t2 = os.times()[4] cpu_time = t2 - t1 ## search stage 6: display results: results_final_nb_total = 0 results_final_nb = {} # will hold number of records found in each collection # (in simple dict to display overview more easily; may refactor later) for coll in results_final.keys(): results_final_nb[coll] = results_final[coll]._nbhits results_final_nb_total += results_final_nb[coll] if results_final_nb_total == 0: if of.startswith('h'): print_warning(req, "No match found, please enter different search terms.") else: # yes, some hits found: good! # collection list may have changed due to not-exact-match-found policy so check it out: for coll in results_final.keys(): if coll not in colls_to_search: colls_to_search.append(coll) # print results overview: if of == "id": # we have been asked to return list of recIDs results_final_for_all_colls = HitSet() for coll in results_final.keys(): results_final_for_all_colls.union(results_final[coll]) recIDs = results_final_for_all_colls.items().tolist() if sf: # do we have to sort? recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of) elif rm: # do we have to rank? results_final_for_all_colls_rank_records_output = rank_records(rm, 0, results_final_for_all_colls, string.split(p) + string.split(p1) + string.split(p2) + string.split(p3), verbose) if results_final_for_all_colls_rank_records_output[0]: recIDs = results_final_for_all_colls_rank_records_output[0] return recIDs elif of.startswith("h"): req.write(print_results_overview(req, colls_to_search, results_final_nb_total, results_final_nb, cpu_time, ln)) # print records: if len(colls_to_search)>1: cpu_time = -1 # we do not want to have search time printed on each collection for coll in colls_to_search: if results_final.has_key(coll) and results_final[coll]._nbhits: if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll], jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, cpu_time)) results_final_recIDs = results_final[coll].items() results_final_relevances = [] results_final_relevances_prologue = "" results_final_relevances_epilogue = "" if sf: # do we have to sort? results_final_recIDs = sort_records(req, results_final_recIDs, sf, so, sp, verbose, of) elif rm: # do we have to rank? results_final_recIDs_ranked, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, results_final_comments = \ rank_records(rm, 0, results_final[coll], string.split(p) + string.split(p1) + string.split(p2) + string.split(p3), verbose) if of.startswith("h"): print_warning(req, results_final_comments) if results_final_recIDs_ranked: results_final_recIDs = results_final_recIDs_ranked else: # rank_records failed and returned some error message to display: print_warning(req, results_final_relevances_prologue) print_warning(req, results_final_relevances_epilogue) print_records(req, results_final_recIDs, jrec, rg, of, ot, ln, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue) if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll], jrec, rg, as, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, cpu_time, 1)) if f == "author" and of.startswith("h"): req.write(create_similarly_named_authors_link_box(p, ln)) # log query: try: log_query(req.get_remote_host(), req.args, uid) except: # do not log query if req is None (used by CLI interface) pass log_query_info("ss", p, f, colls_to_search, results_final_nb_total) ## 4 - write footer: if of.startswith("h"): req.write(create_google_box(cc, p, f, p1, p2, p3, ln)) return page_end(req, of, ln) def perform_request_cache(req, action="show"): """Manipulates the search engine cache.""" global search_cache global collection_reclist_cache global collection_reclist_cache_timestamp global field_i18nname_cache global field_i18nname_cache_timestamp global collection_i18nname_cache global collection_i18nname_cache_timestamp req.content_type = "text/html" req.send_http_header() out = "" out += "

Search Cache

" # clear cache if requested: if action == "clear": search_cache = {} collection_reclist_cache = create_collection_reclist_cache() # show collection reclist cache: out += "

Collection reclist cache

" res = run_sql("SHOW TABLE STATUS LIKE 'collection'") out += "- collection table last updated: %s" % str(res[0][11]) out += "
- reclist cache timestamp: %s" % collection_reclist_cache_timestamp out += "
- reclist cache contents:" out += "
" for coll in collection_reclist_cache.keys(): if collection_reclist_cache[coll]: out += "%s (%d)
" % (coll, get_collection_reclist(coll)._nbhits) out += "
" # show search cache: out += "

Search Cache

" out += "
" if len(search_cache): out += """""" out += "" % ("Pattern","Field","Collection","Number of Hits") for search_cache_key in search_cache.keys(): p, f, c = string.split(search_cache_key, "@", 2) # find out about length of cached data: l = 0 for coll in search_cache[search_cache_key]: l += search_cache[search_cache_key][coll]._nbhits out += "" % (p, f, c, l) out += "
%s%s%s%s
%s%s%s%d
" else: out += "

Search cache is empty." out += "

" out += """

clear cache""" % weburl # show field i18nname cache: out += "

Field I18N names cache

" res = run_sql("SHOW TABLE STATUS LIKE 'fieldname'") out += "- fieldname table last updated: %s" % str(res[0][11]) out += "
- i18nname cache timestamp: %s" % field_i18nname_cache_timestamp out += "
- i18nname cache contents:" out += "
" for field in field_i18nname_cache.keys(): for ln in field_i18nname_cache[field].keys(): out += "%s, %s = %s
" % (field, ln, field_i18nname_cache[field][ln]) out += "
" # show collection i18nname cache: out += "

Collection I18N names cache

" res = run_sql("SHOW TABLE STATUS LIKE 'collectionname'") out += "- collectionname table last updated: %s" % str(res[0][11]) out += "
- i18nname cache timestamp: %s" % collection_i18nname_cache_timestamp out += "
- i18nname cache contents:" out += "
" for coll in collection_i18nname_cache.keys(): for ln in collection_i18nname_cache[coll].keys(): out += "%s, %s = %s
" % (coll, ln, collection_i18nname_cache[coll][ln]) out += "
" req.write(out) return "\n" def perform_request_log(req, date=""): """Display search log information for given date.""" req.content_type = "text/html" req.send_http_header() req.write("

Search Log

") if date: # case A: display stats for a day yyyymmdd = string.atoi(date) req.write("

Date: %d

" % yyyymmdd) req.write("""""") req.write("" % ("No.","Time", "Pattern","Field","Collection","Number of Hits")) # read file: p = os.popen("grep ^%d %s/search.log" % (yyyymmdd,logdir), 'r') lines = p.readlines() p.close() # process lines: i = 0 for line in lines: try: datetime, as, p, f, c, nbhits = string.split(line,"#") i += 1 req.write("" \ % (i, datetime[8:10], datetime[10:12], datetime[12:], p, f, c, nbhits)) except: pass # ignore eventual wrong log lines req.write("
%s%s%s%s%s%s
#%d%s:%s:%s%s%s%s%s
") else: # case B: display summary stats per day - yyyymm01 = int(time.strftime("%04Y%02m01", time.localtime())) - yyyymmdd = int(time.strftime("%04Y%02m%02d", time.localtime())) + yyyymm01 = int(time.strftime("%Y%m01", time.localtime())) + yyyymmdd = int(time.strftime("%Y%m%d", time.localtime())) req.write("""""") req.write("" % ("Day", "Number of Queries")) for day in range(yyyymm01,yyyymmdd+1): p = os.popen("grep -c ^%d %s/search.log" % (day,logdir), 'r') for line in p.readlines(): req.write("""""" % (day, weburl,day,line)) p.close() req.write("
%s%s
%s%s
") return "\n" def profile(p="", f="", c=cdsname): """Profile search time.""" import profile import pstats profile.run("perform_request_search(p='%s',f='%s', c='%s')" % (p, f, c), "perform_request_search_profile") p = pstats.Stats("perform_request_search_profile") p.strip_dirs().sort_stats("cumulative").print_stats() return 0 ## test cases: #print wash_colls(cdsname,"Library Catalogue", 0) #print wash_colls("Periodicals & Progress Reports",["Periodicals","Progress Reports"], 0) #print wash_field("wau") #print print_record(20,"tm","001,245") #print create_opft_search_units(None, "PHE-87-13","reportnumber") #print ":"+wash_pattern("* and % doo * %")+":\n" #print ":"+wash_pattern("*")+":\n" #print ":"+wash_pattern("ellis* ell* e*%")+":\n" #print run_sql("SELECT name,dbquery from collection") #print get_index_id("author") #print get_coll_ancestors("Theses") #print get_coll_sons("Articles & Preprints") #print get_coll_real_descendants("Articles & Preprints") #print get_collection_reclist("Theses") #print log(sys.stdin) #print search_unit_in_bibrec('2002-12-01','2002-12-12') #print wash_dates('1980', '', '28', '2003','02','') #print type(wash_url_argument("-1",'int')) #print get_nearest_terms_in_bibxxx("ellis", "author", 5, 5) #print call_bibformat(68, "HB_FLY") #print create_collection_i18nname_cache() ## profiling: #profile("of the this") #print perform_request_search(p="ellis") diff --git a/modules/websearch/lib/websearchadminlib.py b/modules/websearch/lib/websearchadminlib.py index ece3b510e..4fd2e7edb 100644 --- a/modules/websearch/lib/websearchadminlib.py +++ b/modules/websearch/lib/websearchadminlib.py @@ -1,3174 +1,3174 @@ ## $Id$ ## Administrator interface for WebSearch ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """CDSware WebSearch Administrator Interface.""" import cgi import re import MySQLdb import Numeric import os import urllib import random import marshal import time from zlib import compress,decompress from mod_python import apache from cdsware.bibrankadminlib import write_outcome,modify_translations,get_def_name,get_i8n_name,get_name,get_rnk_nametypes,get_languages,check_user,is_adminuser,adderrorbox,addadminbox,tupletotable,tupletotable_onlyselected,addcheckboxes,createhiddenform,serialize_via_numeric_array_dumps,serialize_via_numeric_array_compr,serialize_via_numeric_array_escape,serialize_via_numeric_array,deserialize_via_numeric_array,serialize_via_marshal,deserialize_via_marshal from cdsware.dbquery import run_sql from cdsware.config import * from cdsware.webpage import page, pageheaderonly, pagefooteronly from cdsware.webuser import getUid, get_email __version__ = "$Id$" def getnavtrail(previous = ''): """Get the navtrail""" navtrail = """Admin Area > WebSearch Admin """ % (weburl, weburl) navtrail = navtrail + previous return navtrail def perform_modifytranslations(colID, ln, sel_type='', trans=[], confirm=-1, callback='yes'): """Modify the translations of a collection sel_type - the nametype to modify trans - the translations in the same order as the languages from get_languages()""" output = '' subtitle = '' cdslangs = get_languages() if confirm in ["2", 2] and colID: finresult = modify_translations(colID, cdslangs, sel_type, trans, "collection") col_dict = dict(get_def_name('', "collection")) if colID and col_dict.has_key(int(colID)): colID = int(colID) subtitle = """3. Modify translations for collection '%s'   [?]""" % (col_dict[colID], weburl) if type(trans) is str: trans = [trans] if sel_type == '': sel_type = get_col_nametypes()[0][0] header = ['Language', 'Translation'] actions = [] types = get_col_nametypes() if len(types) > 1: text = """ Name type """ output += createhiddenform(action="modifytranslations#3", text=text, button="Select", colID=colID, ln=ln, confirm=0) if confirm in [-1, "-1", 0, "0"]: trans = [] for (key, value) in cdslangs: try: trans_names = get_name(colID, key, sel_type, "collection") trans.append(trans_names[0][0]) except StandardError, e: trans.append('') for nr in range(0,len(cdslangs)): actions.append(["%s %s" % (cdslangs[nr][1], (cdslangs[nr][0]==cdslang and '(def)' or ''))]) actions[-1].append('' % trans[nr]) text = tupletotable(header=header, tuple=actions) output += createhiddenform(action="modifytranslations#3", text=text, button="Modify", colID=colID, sel_type=sel_type, ln=ln, confirm=2) if sel_type and len(trans) and confirm in ["2", 2]: output += write_outcome(finresult) try: body = [output, extra] except NameError: body = [output] if callback: return perform_editcollection(colID, ln, "perform_modifytranslations", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_modifyrankmethods(colID, ln, func='', rnkID='', confirm=0, callback='yes'): """Modify which rank methods is visible to the collection func - remove or add rank method rnkID - the id of the rank method.""" output = "" subtitle = "" col_dict = dict(get_def_name('', "collection")) rnk_dict = dict(get_def_name('', "rnkMETHOD")) if colID and col_dict.has_key(int(colID)): colID = int(colID) if func in ["0", 0] and confirm in ["1", 1]: finresult = attach_rnk_col(colID, rnkID) elif func in ["1", 1] and confirm in ["1", 1]: finresult = detach_rnk_col(colID, rnkID) subtitle = """9. Modify rank options for collection '%s'   [?]""" % (col_dict[colID], weburl) output = """

The rank methods enabled for the collection '%s' is:
""" % col_dict[colID] rnkmethods = get_col_rnk(colID, ln) output += """
""" if not rnkmethods: output += """No rank methods""" else: for id, name in rnkmethods: output += """%s, """ % name output += """
""" rnk_list = get_def_name('', "rnkMETHOD") rnk_dict_in_col = dict(get_col_rnk(colID, ln)) rnk_list = filter(lambda x: not rnk_dict_in_col.has_key(x[0]), rnk_list) if rnk_list: text = """ Enable: """ output += createhiddenform(action="modifyrankmethods#9", text=text, button="Enable", colID=colID, ln=ln, func=0, confirm=1) if confirm in ["1", 1] and func in ["0", 0] and int(rnkID) != -1: output += write_outcome(finresult) elif confirm not in ["0", 0] and func in ["0", 0]: output += """Please select a rank method.""" coll_list = get_col_rnk(colID, ln) if coll_list: text = """ Disable: """ output += createhiddenform(action="modifyrankmethods#9", text=text, button="Disable", colID=colID, ln=ln, func=1, confirm=1) if confirm in ["1", 1] and func in ["1", 1] and int(rnkID) != -1: output += write_outcome(finresult) elif confirm not in ["0", 0] and func in ["1", 1]: output += """Please select a rank method.""" try: body = [output, extra] except NameError: body = [output] if callback: return perform_editcollection(colID, ln, "perform_modifyrankmethods", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_addcollectiontotree(colID, ln, add_dad='', add_son='', rtype='', mtype='', callback='yes', confirm=-1): """Form to add a collection to the tree. add_dad - the dad to add the collection to add_son - the collection to add rtype - add it as a regular or virtual mtype - add it to the regular or virtual tree.""" output = "" output2 = "" subtitle = """Attach collection to tree   [?]""" % (weburl) col_dict = dict(get_def_name('', "collection")) if confirm not in [-1, "-1"] and not (add_son and add_dad and rtype): output2 += """All fields must be filled.

""" elif add_son and add_dad and rtype: add_son = int(add_son) add_dad = int(add_dad) if confirm not in [-1, "-1"]: if add_son == add_dad: output2 += """Cannot add a collection as a pointer to itself.

""" elif check_col(add_dad, add_son): res = add_col_dad_son(add_dad, add_son, rtype) output2 += write_outcome(res) if res[0] == 1: output2 += """
The collection will appear on your website after the next webcoll run. You can either run it manually or wait until bibsched does it for you.


""" else: output2 += """Cannot add the collection '%s' as a %s subcollection of '%s' since it will either create a loop, or the association already exists.

""" % (col_dict[add_son], (rtype=="r" and 'regular' or 'virtual'), col_dict[add_dad]) add_son = '' add_dad = '' rtype = '' tree = get_col_tree(colID) col_list = col_dict.items() col_list.sort(compare_on_val) output = show_coll_not_in_tree(colID, ln, col_dict) text = """ Attach which
Attach to
""" text += """ Relationship """ % ((rtype=="r" and 'selected="selected"' or ''), (rtype=="v" and 'selected="selected"' or '')) output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/addcollectiontotree" % weburl, text=text, button="Add", colID=colID, ln=ln, confirm=1) output += output2 #output += perform_showtree(colID, ln) try: body = [output, extra] except NameError: body = [output] if callback: return perform_index(colID, ln, mtype="perform_addcollectiontotree", content=addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_addcollection(colID, ln, colNAME='', dbquery='', rest='', callback="yes", confirm=-1): """form to add a new collection. colNAME - the name of the new collection dbquery - the dbquery of the new collection rest - the group allowed to access the new collection""" output = "" subtitle = """Create new collection   [?]""" % (weburl) text = """ Default name
""" % colNAME output = createhiddenform(action="%s/admin/websearch/websearchadmin.py/addcollection" % weburl, text=text, colID=colID, ln=ln, button="Add collection", confirm=1) if colNAME and confirm in ["1", 1]: res = add_col(colNAME, '', '') output += write_outcome(res) if res[0] == 1: output += perform_addcollectiontotree(colID=colID, ln=ln, add_son=res[1], callback='') elif confirm not in ["-1", -1]: output += """Please give the collection a name.""" try: body = [output, extra] except NameError: body = [output] if callback: return perform_index(colID, ln=ln, mtype="perform_addcollection", content=addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_modifydbquery(colID, ln, dbquery='', callback='yes', confirm=-1): """form to modify the dbquery of the collection. dbquery - the dbquery of the collection.""" subtitle = '' output = "" col_dict = dict(get_def_name('', "collection")) if colID and col_dict.has_key(int(colID)): colID = int(colID) subtitle = """1. Modify collection query for collection '%s'   [?]""" % (col_dict[colID], weburl) if confirm == -1: res = run_sql("SELECT dbquery FROM collection WHERE id=%s" % colID) dbquery = res[0][0] if not dbquery: dbquery = '' reg_sons = len(get_col_tree(colID, 'r')) vir_sons = len(get_col_tree(colID, 'v')) if reg_sons > 1: if dbquery: output += "Warning: This collection got subcollections, and should because of this not have a collection query, for further explanation, check the WebSearch Guide
" elif reg_sons <= 1: if not dbquery: output += "Warning: This collection does not have any subcollections, and should because of this have a collection query, for further explanation, check the WebSearch Guide
" text = """ Query
""" % dbquery output += createhiddenform(action="modifydbquery", text=text, button="Modify", colID=colID, ln=ln, confirm=1) if confirm in ["1", 1]: res = modify_dbquery(colID, dbquery) if res: if dbquery == "": text = """Query removed for this collection.""" else: text = """Query set for this collection.""" else: text = """Sorry, could not change query.""" output += text try: body = [output, extra] except NameError: body = [output] if callback: return perform_editcollection(colID, ln, "perform_modifydbquery", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_modifyrestricted(colID, ln, rest='', callback='yes', confirm=-1): """modify which apache group is allowed to access the collection. rest - the groupname""" subtitle = '' output = "" col_dict = dict(get_def_name('', "collection")) if colID and col_dict.has_key(int(colID)): colID = int(colID) subtitle = """2. Modify access restrictions for collection '%s'   [?]""" % (col_dict[colID], weburl) if confirm == -1: res = run_sql("SELECT restricted FROM collection WHERE id=%s" % colID) rest = res[0][0] if not rest: rest = '' text = """ Restricted to:
""" % rest output += createhiddenform(action="modifyrestricted", text=text, button="Modify", colID=colID, ln=ln, confirm=1) if confirm in ["1", 1]: res = modify_restricted(colID, rest) if res: if rest == "": text = """Removed restriction for this collection.""" else: text = """Restriction set for this collection.""" else: text = """Sorry, could not change the access restrictions.""" output += text try: body = [output, extra] except NameError: body = [output] if callback: return perform_editcollection(colID, ln, "perform_modifyrestricted", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_modifycollectiontree(colID, ln, move_up='', move_down='', move_from='', move_to='', delete='', rtype='', callback='yes', confirm=0): """to modify the collection tree: move a collection up and down, delete a collection, or change the father of the collection. colID - the main collection of the tree, the root move_up - move this collection up (is not the collection id, but the place in the tree) move_up - move this collection down (is not the collection id, but the place in the tree) move_from - move this collection from the current positon (is not the collection id, but the place in the tree) move_to - move the move_from collection and set this as it's father. (is not the collection id, but the place in the tree) delete - delete this collection from the tree (is not the collection id, but the place in the tree) rtype - the type of the collection in the tree, regular or virtual""" colID = int(colID) tree = get_col_tree(colID, rtype) col_dict = dict(get_def_name('', "collection")) subtitle = """Modify collection tree: %s   [?]   Printer friendly version""" % (col_dict[colID], weburl, weburl, colID, ln) fin_output = "" output = "" try: if move_up: move_up = int(move_up) switch = find_last(tree, move_up) if switch and switch_col_treescore(tree[move_up], tree[switch]): output += """Moved the %s collection '%s' up and '%s' down.

""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_up][0]], col_dict[tree[switch][0]]) else: output += """Could not move the %s collection '%s' up and '%s' down.

""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_up][0]], col_dict[tree[switch][0]]) elif move_down: move_down = int(move_down) switch = find_next(tree, move_down) if switch and switch_col_treescore(tree[move_down], tree[switch]): output += """Moved the %s collection '%s' down and '%s' up.

""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_down][0]], col_dict[tree[switch][0]]) else: output += """Could not move the %s collection '%s' up and '%s' down.

""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_up][0]],col_dict[tree[switch][0]]) elif delete: delete = int(delete) if confirm in [0, "0"]: if col_dict[tree[delete][0]] != col_dict[tree[delete][3]]: text = """Do you want to remove the %s collection '%s' and its subcollections in the %s collection '%s'. """ % ((tree[delete][4]=="r" and 'regular' or 'virtual'), col_dict[tree[delete][0]], (rtype=="r" and 'regular' or 'virtual'), col_dict[tree[delete][3]]) else: text = """Do you want to remove all subcollections of the %s collection '%s'. """ % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[delete][3]]) output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/modifycollectiontree#tree" % weburl, text=text, button="Confirm", colID=colID, delete=delete, rtype=rtype, ln=ln, confirm=1) output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/index?mtype=perform_modifycollectiontree#tree" % weburl, text="To cancel", button="Cancel", colID=colID, ln=ln) else: if remove_col_subcol(tree[delete][0], tree[delete][3], rtype): if col_dict[tree[delete][0]] != col_dict[tree[delete][3]]: output += """Removed the %s collection '%s' and its subcollections in subdirectory '%s'.

""" % ((tree[delete][4]=="r" and 'regular' or 'virtual'), col_dict[tree[delete][0]], col_dict[tree[delete][3]]) else: output += """Removed the subcollections of the %s collection '%s'.

""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[delete][3]]) else: output += """Could not remove the collection from the tree.

""" delete = '' elif move_from and not move_to: move_from_rtype = move_from[0] move_from_id = int(move_from[1:len(move_from)]) text = """Select collection to place the %s collection '%s' under.

""" % ((move_from_rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_from_id][0]]) output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/index?mtype=perform_modifycollectiontree#tree" % weburl, text=text, button="Cancel", colID=colID, ln=ln) elif move_from and move_to: move_from_rtype = move_from[0] move_from_id = int(move_from[1:len(move_from)]) move_to_rtype = move_to[0] move_to_id = int(move_to[1:len(move_to)]) tree_from = get_col_tree(colID, move_from_rtype) tree_to = get_col_tree(colID, move_to_rtype) if confirm in [0, '0']: if move_from_id == move_to_id and move_from_rtype==move_to_rtype: output += """Cannot move to itself.

""" elif tree_from[move_from_id][3] == tree_to[move_to_id][0] and move_from_rtype==move_to_rtype: output += """The collection is already there.

""" elif check_col(tree_to[move_to_id][0], tree_from[move_from_id][0]) or (tree_to[move_to_id][0] == 1 and tree_from[move_from_id][3] == tree_to[move_to_id][0] and move_from_rtype != move_to_rtype): text = """Move %s collection '%s' to the %s collection '%s'. """ % ((tree_from[move_from_id][4]=="r" and 'regular' or 'virtual'), col_dict[tree_from[move_from_id][0]], (tree_to[move_to_id][4]=="r" and 'regular' or 'virtual'), col_dict[tree_to[move_to_id][0]]) output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/modifycollectiontree#tree" % weburl, text=text, button="Confirm", colID=colID, move_from=move_from, move_to=move_to, ln=ln, rtype=rtype, confirm=1) output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/index?mtype=perform_modifycollectiontree#tree" % weburl, text="""To cancel""", button="Cancel", colID=colID, ln=ln) else: output += """Cannot move the collection '%s' and set it as a subcollection of '%s' since it will create a loop.

""" % (col_dict[tree_from[move_from_id][0]], col_dict[tree_to[move_to_id][0]]) else: if (move_to_id != 0 and move_col_tree(tree_from[move_from_id], tree_to[move_to_id])) or (move_to_id == 0 and move_col_tree(tree_from[move_from_id], tree_to[move_to_id], move_to_rtype)): output += """Moved %s collection '%s' to the %s collection '%s'.

""" % ((move_from_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_from[move_from_id][0]], (move_to_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_to[move_to_id][0]]) else: output += """Could not move %s collection '%s' to the %s collection '%s'.

""" % ((move_from_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_from[move_from_id][0]], (move_to_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_to[move_to_id][0]]) move_from = '' move_to = '' else: output += """ """ except StandardError, e: return """An error occured. """ output += """
Narrow by collection: Focus on...:
""" tree = get_col_tree(colID, 'r') output += create_colltree(tree, col_dict, colID, ln, move_from, move_to, 'r', "yes") output += """ """ tree = get_col_tree(colID, 'v') output += create_colltree(tree, col_dict, colID, ln, move_from, move_to, 'v', "yes") output += """
""" try: body = [output, extra] except NameError: body = [output] if callback: return perform_index(colID, ln, mtype="perform_modifycollectiontree", content=addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_showtree(colID, ln): """create collection tree/hiarchy""" col_dict = dict(get_def_name('', "collection")) subtitle = "Collection tree: %s" % col_dict[int(colID)] output = """
Narrow by collection: Focus on...:
""" tree = get_col_tree(colID, 'r') output += create_colltree(tree, col_dict, colID, ln, '', '', 'r', '') output += """ """ tree = get_col_tree(colID, 'v') output += create_colltree(tree, col_dict, colID, ln, '', '', 'v', '') output += """
""" try: body = [output, extra] except NameError: body = [output] return addadminbox(subtitle, body) def perform_addportalbox(colID, ln, title='', body='', callback='yes', confirm=-1): """form to add a new portalbox title - the title of the portalbox body - the body of the portalbox""" col_dict = dict(get_def_name('', "collection")) colID = int(colID) subtitle = """Create new portalbox""" text = """ Title
Body
""" % (cgi.escape(title), cgi.escape(body)) output = createhiddenform(action="addportalbox#5.1", text=text, button="Add", colID=colID, ln=ln, confirm=1) if body and confirm in [1, "1"]: title = cgi.escape(title) body = cgi.escape(body) res = add_pbx(title, body) output += write_outcome(res) if res[1] == 1: output += """Add portalbox to collection""" % (colID, ln, res[1]) elif confirm not in [-1, "-1"]: output += """Body field must be filled. """ try: body = [output, extra] except NameError: body = [output] return perform_showportalboxes(colID, ln, content=addadminbox(subtitle, body)) def perform_addexistingportalbox(colID, ln, pbxID=-1, score=0, position='', sel_ln='', callback='yes', confirm=-1): """form to add an existing portalbox to a collection. colID - the collection to add the portalbox to pbxID - the portalbox to add score - the importance of the portalbox. position - the position of the portalbox on the page sel_ln - the language of the portalbox""" subtitle = """Add existing portalbox to collection""" output = "" colID = int(colID) res = get_pbx() pos = get_pbx_pos() lang = dict(get_languages()) col_dict = dict(get_def_name('', "collection")) pbx_dict = dict(map(lambda x: (x[0], x[1]), res)) col_pbx = get_col_pbx(colID) col_pbx = dict(map(lambda x: (x[0], x[5]), col_pbx)) if len(res) > 0: text = """ Portalbox
Language
Position " output += createhiddenform(action="addexistingportalbox#5.2", text=text, button="Add", colID=colID, ln=ln, confirm=1) else: output = """No existing portalboxes to add, please create a new one. """ if pbxID > -1 and position and sel_ln and confirm in [1, "1"]: pbxID = int(pbxID) res = add_col_pbx(colID, pbxID, sel_ln, position, '') output += write_outcome(res) elif pbxID > -1 and confirm not in [-1, "-1"]: output += """All fields must be filled. """ try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_showportalboxes(colID, ln, content=output) def perform_deleteportalbox(colID, ln, pbxID=-1, callback='yes', confirm=-1): """form to delete a portalbox which is not in use. colID - the current collection. pbxID - the id of the portalbox""" subtitle = """Delete an unused portalbox""" output = "" colID = int(colID) if pbxID not in [-1," -1"] and confirm in [1, "1"]: ares = get_pbx() pbx_dict = dict(map(lambda x: (x[0], x[1]), ares)) if pbx_dict.has_key(int(pbxID)): pname = pbx_dict[int(pbxID)] ares = delete_pbx(int(pbxID)) else: return """This portalbox does not exist""" res = get_pbx() col_dict = dict(get_def_name('', "collection")) pbx_dict = dict(map(lambda x: (x[0], x[1]), res)) col_pbx = get_col_pbx() col_pbx = dict(map(lambda x: (x[0], x[5]), col_pbx)) if len(res) > 0: text = """ Portalbox
""" output += createhiddenform(action="deleteportalbox#5.3", text=text, button="Delete", colID=colID, ln=ln, confirm=1) if pbxID not in [-1,"-1"]: pbxID = int(pbxID) if confirm in [1, "1"]: output += write_outcome(ares) elif confirm not in [-1, "-1"]: output += """Choose a portalbox to delete. """ try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_showportalboxes(colID, ln, content=output) def perform_modifyportalbox(colID, ln, pbxID=-1, score='', position='', sel_ln='', title='', body='', callback='yes', confirm=-1): """form to modify a portalbox in a collection, or change the portalbox itself. colID - the id of the collection. pbxID - the portalbox to change score - the score of the portalbox connected to colID which should be changed. position - the position of the portalbox in collection colID to change.""" subtitle = "" output = "" colID = int(colID) res = get_pbx() pos = get_pbx_pos() lang = dict(get_languages()) col_dict = dict(get_def_name('', "collection")) pbx_dict = dict(map(lambda x: (x[0], x[1]), res)) col_pbx = get_col_pbx(colID) col_pbx = dict(map(lambda x: (x[0], x[5]), col_pbx)) if pbxID not in [-1, "-1"]: pbxID = int(pbxID) subtitle = """Modify portalbox '%s' for this collection""" % pbx_dict[pbxID] col_pbx = get_col_pbx(colID) if not (score and position) and not (body and title): for (id_pbx, id_collection, tln, score, position, title, body) in col_pbx: if id_pbx == pbxID: break output += """Collection (presentation) specific values (Changes implies only to this collection.)
""" text = """ Position
""" output += createhiddenform(action="modifyportalbox#5.4", text=text, button="Modify", colID=colID, pbxID=pbxID, score=score, title=title, body=cgi.escape(body, 1), sel_ln=sel_ln, ln=ln, confirm=3) if pbxID > -1 and score and position and confirm in [3, "3"]: pbxID = int(pbxID) res = modify_pbx(colID, pbxID, sel_ln, score, position, '', '') res2 = get_pbx() pbx_dict = dict(map(lambda x: (x[0], x[1]), res2)) output += write_outcome(res) output += """
Portalbox (content) specific values (any changes appears everywhere the portalbox is used.)""" text = """ Title
""" % cgi.escape(title) text += """ Body
""" % cgi.escape(body) output += createhiddenform(action="modifyportalbox#5.4", text=text, button="Modify", colID=colID, pbxID=pbxID, sel_ln=sel_ln, score=score, position=position, ln=ln, confirm=4) if pbxID > -1 and confirm in [4, "4"]: pbxID = int(pbxID) res = modify_pbx(colID, pbxID, sel_ln, '', '', title, body) output += write_outcome(res) else: output = """No portalbox to modify.""" try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_showportalboxes(colID, ln, content=output) def perform_switchpbxscore(colID, id_1, id_2, sel_ln, ln): """Switch the score of id_1 and id_2 in collection_portalbox. colID - the current collection id_1/id_2 - the id's to change the score for. sel_ln - the language of the portalbox""" res = get_pbx() pbx_dict = dict(map(lambda x: (x[0], x[1]), res)) res = switch_pbx_score(colID, id_1, id_2, sel_ln) output += write_outcome(res) return perform_showportalboxes(colID, ln, content=output) def perform_showportalboxes(colID, ln, callback='yes', content='', confirm=-1): """show the portalboxes of this collection. colID - the portalboxes to show the collection for.""" colID = int(colID) col_dict = dict(get_def_name('', "collection")) subtitle = """5. Modify portalboxes for collection '%s'   [?]""" % (col_dict[colID], weburl) output = "" pos = get_pbx_pos() output = """
Portalbox actions (not related to this collection)
Create new portalbox
Delete an unused portalbox
Collection specific actions
Add existing portalbox to collection
""" % (colID, ln, colID, ln, colID, ln) header = ['Position', 'Language', '', 'Title', 'Actions'] actions = [] cdslang = get_languages() lang = dict(cdslang) pos_list = pos.items() pos_list.sort() if len(get_col_pbx(colID)) > 0: for (key, value) in cdslang: for (pos_key, pos_value) in pos_list: res = get_col_pbx(colID, key, pos_key) i = 0 for (pbxID, colID_pbx, tln, score, position, title, body) in res: move = """
""" if i != 0: move += """""" % (weburl, colID, ln, pbxID, res[i - 1][0], tln, random.randint(0, 1000), weburl) else: move += "   " move += "" i += 1 if i != len(res): move += """""" % (weburl, colID, ln, pbxID, res[i][0], tln, random.randint(0, 1000), weburl) move += """
""" actions.append(["%s" % (i==1 and pos[position] or ''), "%s" % (i==1 and lang[tln] or ''), move, "%s" % title]) for col in [(('Modify', 'modifyportalbox'), ('Remove', 'removeportalbox'),)]: actions[-1].append('%s' % (weburl, col[0][1], colID, ln, pbxID, tln, col[0][0])) for (str, function) in col[1:]: actions[-1][-1] += ' / %s' % (weburl, function, colID, ln, pbxID, tln, str) output += tupletotable(header=header, tuple=actions) else: output += """No portalboxes exists for this collection""" output += content try: body = [output, extra] except NameError: body = [output] if callback: return perform_editcollection(colID, ln, "perform_showportalboxes", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_removeportalbox(colID, ln, pbxID='', sel_ln='', callback='yes', confirm=0): """form to remove a portalbox from a collection. colID - the current collection, remove the portalbox from this collection. sel_ln - remove the portalbox with this language pbxID - remove the portalbox with this id""" subtitle = """Remove portalbox""" output = "" col_dict = dict(get_def_name('', "collection")) res = get_pbx() pbx_dict = dict(map(lambda x: (x[0], x[1]), res)) if colID and pbxID and sel_ln: colID = int(colID) pbxID = int(pbxID) if confirm in ["0", 0]: text = """Do you want to remove the portalbox '%s' from the collection '%s'.""" % (pbx_dict[pbxID], col_dict[colID]) output += createhiddenform(action="removeportalbox#5.5", text=text, button="Confirm", colID=colID, pbxID=pbxID, sel_ln=sel_ln, confirm=1) elif confirm in ["1", 1]: res = remove_pbx(colID, pbxID, sel_ln) output += write_outcome(res) try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_showportalboxes(colID, ln, content=output) def perform_switchfmtscore(colID, type, id_1, id_2, ln): """Switch the score of id_1 and id_2 in the table type. colID - the current collection id_1/id_2 - the id's to change the score for. type - like "format" """ fmt_dict = dict(get_def_name('', "format")) res = switch_score(colID, id_1, id_2, type) output = write_outcome(res) return perform_showoutputformats(colID, ln, content=output) def perform_switchfldscore(colID, id_1, id_2, fmeth, ln): """Switch the score of id_1 and id_2 in collection_field_fieldvalue. colID - the current collection id_1/id_2 - the id's to change the score for.""" fld_dict = dict(get_def_name('', "field")) res = switch_fld_score(colID, id_1, id_2) output = write_outcome(res) if fmeth == "soo": return perform_showsortoptions(colID, ln, content=output) elif fmeth == "sew": return perform_showsearchfields(colID, ln, content=output) elif fmeth == "seo": return perform_showsearchoptions(colID, ln, content=output) def perform_switchfldvaluescore(colID, id_1, id_fldvalue_1, id_fldvalue_2, ln): """Switch the score of id_1 and id_2 in collection_field_fieldvalue. colID - the current collection id_1/id_2 - the id's to change the score for.""" name_1 = run_sql("SELECT name from fieldvalue where id=%s" % id_fldvalue_1)[0][0] name_2 = run_sql("SELECT name from fieldvalue where id=%s" % id_fldvalue_2)[0][0] res = switch_fld_value_score(colID, id_1, id_fldvalue_1, id_fldvalue_2) output = write_outcome(res) return perform_modifyfield(colID, fldID=id_1, ln=ln, content=output) def perform_addnewfieldvalue(colID, fldID, ln, name='', value='', callback="yes", confirm=-1): """form to add a new fieldvalue. name - the name of the new fieldvalue value - the value of the new fieldvalue """ output = "" subtitle = """Add new value""" text = """ Display name
Search value
""" % (name, value) output = createhiddenform(action="%s/admin/websearch/websearchadmin.py/addnewfieldvalue" % weburl, text=text, colID=colID, fldID=fldID, ln=ln, button="Add", confirm=1) if name and value and confirm in ["1", 1]: res = add_fldv(name, value) output += write_outcome(res) if res[0] == 1: res = add_col_fld(colID, fldID, 'seo', res[1]) if res[0] == 0: output += "
" + write_outcome(res) elif confirm not in ["-1", -1]: output += """Please fill in name and value. """ try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_modifyfield(colID, fldID=fldID, ln=ln, content=output) def perform_modifyfieldvalue(colID, fldID, fldvID, ln, name='', value='', callback="yes", confirm=-1): """form to modify a fieldvalue. name - the name of the fieldvalue value - the value of the fieldvalue """ if confirm in [-1, "-1"]: res = get_fld_value(fldvID) (id, name, value) = res[0] output = "" subtitle = """Modify existing value""" output = """
Warning: Modifications done below will also inflict on all places the modified data is used.
""" text = """ Display name
Search value
""" % (name, value) output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/modifyfieldvalue" % weburl, text=text, colID=colID, fldID=fldID, fldvID=fldvID, ln=ln, button="Update", confirm=1) output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/modifyfieldvalue" % weburl, text="Delete value and all associations", colID=colID, fldID=fldID, fldvID=fldvID, ln=ln, button="Delete", confirm=2) if name and value and confirm in ["1", 1]: res = update_fldv(fldvID, name, value) output += write_outcome(res) #if res: # output += """Operation successfully completed.""" #else: # output += """Operation failed.""" elif confirm in ["2", 2]: res = delete_fldv(fldvID) output += write_outcome(res) elif confirm not in ["-1", -1]: output += """Please fill in name and value.""" try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_modifyfield(colID, fldID=fldID, ln=ln, content=output) def perform_removefield(colID, ln, fldID='', fldvID='', fmeth='', callback='yes', confirm=0): """form to remove a field from a collection. colID - the current collection, remove the field from this collection. sel_ln - remove the field with this language fldID - remove the field with this id""" if fmeth == "soo": field = "sort option" elif fmeth == "sew": field = "search field" elif fmeth == "seo": field = "search option" else: field = "field" subtitle = """Remove %s""" % field output = "" col_dict = dict(get_def_name('', "collection")) fld_dict = dict(get_def_name('', "field")) res = get_fld_value() fldv_dict = dict(map(lambda x: (x[0], x[1]), res)) if colID and fldID: colID = int(colID) fldID = int(fldID) if fldvID and fldvID != "None": fldvID = int(fldvID) if confirm in ["0", 0]: text = """Do you want to remove the %s '%s' %s from the collection '%s'.""" % (field, fld_dict[fldID], (fldvID not in["", "None"] and "with value '%s'" % fldv_dict[fldvID] or ''), col_dict[colID]) output += createhiddenform(action="removefield#6.5", text=text, button="Confirm", colID=colID, fldID=fldID, fldvID=fldvID, fmeth=fmeth, confirm=1) elif confirm in ["1", 1]: res = remove_fld(colID, fldID, fldvID) output += write_outcome(res) try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) if fmeth == "soo": return perform_showsortoptions(colID, ln, content=output) elif fmeth == "sew": return perform_showsearchfields(colID, ln, content=output) elif fmeth == "seo": return perform_showsearchoptions(colID, ln, content=output) def perform_removefieldvalue(colID, ln, fldID='', fldvID='', fmeth='', callback='yes', confirm=0): """form to remove a field from a collection. colID - the current collection, remove the field from this collection. sel_ln - remove the field with this language fldID - remove the field with this id""" subtitle = """Remove value""" output = "" col_dict = dict(get_def_name('', "collection")) fld_dict = dict(get_def_name('', "field")) res = get_fld_value() fldv_dict = dict(map(lambda x: (x[0], x[1]), res)) if colID and fldID: colID = int(colID) fldID = int(fldID) if fldvID and fldvID != "None": fldvID = int(fldvID) if confirm in ["0", 0]: text = """Do you want to remove the value '%s' from the search option '%s'.""" % (fldv_dict[fldvID], fld_dict[fldID]) output += createhiddenform(action="removefieldvalue#7.4", text=text, button="Confirm", colID=colID, fldID=fldID, fldvID=fldvID, fmeth=fmeth, confirm=1) elif confirm in ["1", 1]: res = remove_fld(colID, fldID, fldvID) output += write_outcome(res) try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_modifyfield(colID, fldID=fldID, ln=ln, content=output) def perform_rearrangefieldvalue(colID, fldID, ln, callback='yes', confirm=-1): """rearrang the fieldvalues alphabetically colID - the collection fldID - the field to rearrange the fieldvalue for """ subtitle = "Order values alphabetically" output = "" col_fldv = get_col_fld(colID, 'seo', fldID) col_fldv = dict(map(lambda x: (x[1], x[0]), col_fldv)) fldv_names = get_fld_value() fldv_names = map(lambda x: (x[0], x[1]), fldv_names) if not col_fldv.has_key(None): vscore = len(col_fldv) for (fldvID, name) in fldv_names: if col_fldv.has_key(fldvID): run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=%s WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s" % (vscore, colID, fldID, fldvID)) vscore -= 1 output += write_outcome((1, "")) else: output += write_outcome((0, (0, "No values to order"))) try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_modifyfield(colID, fldID, ln, content=output) def perform_rearrangefield(colID, ln, fmeth, callback='yes', confirm=-1): """rearrang the fields alphabetically colID - the collection """ subtitle = "Order fields alphabetically" output = "" col_fld = dict(map(lambda x: (x[0], x[1]), get_col_fld(colID, fmeth))) fld_names = get_def_name('', "field") if len(col_fld) > 0: score = len(col_fld) for (fldID, name) in fld_names: if col_fld.has_key(fldID): run_sql("UPDATE collection_field_fieldvalue SET score=%s WHERE id_collection=%s and id_field=%s" % (score, colID, fldID)) score -= 1 output += write_outcome((1, "")) else: output += write_outcome((0, (0, "No fields to order"))) try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) if fmeth == "soo": return perform_showsortoptions(colID, ln, content=output) elif fmeth == "sew": return perform_showsearchfields(colID, ln, content=output) elif fmeth == "seo": return perform_showsearchoptions(colID, ln, content=output) def perform_addexistingfieldvalue(colID, fldID, fldvID=-1, ln=cdslang, callback='yes', confirm=-1): """form to add an existing fieldvalue to a field. colID - the collection fldID - the field to add the fieldvalue to fldvID - the fieldvalue to add""" subtitle = """Add existing value to search option""" output = "" if fldvID not in [-1, "-1"] and confirm in [1, "1"]: fldvID = int(fldvID) ares = add_col_fld(colID, fldID, 'seo', fldvID) colID = int(colID) fldID = int(fldID) lang = dict(get_languages()) res = get_def_name('', "field") col_dict = dict(get_def_name('', "collection")) fld_dict = dict(res) col_fld = dict(map(lambda x: (x[0], x[1]), get_col_fld(colID, 'seo'))) fld_value = get_fld_value() fldv_dict = dict(map(lambda x: (x[0], x[1]), fld_value)) text = """ Value
""" output += createhiddenform(action="addexistingfieldvalue#7.4", text=text, button="Add", colID=colID, fldID=fldID, ln=ln, confirm=1) if fldvID not in [-1, "-1"] and confirm in [1, "1"]: output += write_outcome(ares) elif confirm in [1, "1"]: output += """Select a value to add and try again.""" try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_modifyfield(colID, fldID, ln, content=output) def perform_addexistingfield(colID, ln, fldID=-1, fldvID=-1, fmeth='', callback='yes', confirm=-1): """form to add an existing field to a collection. colID - the collection to add the field to fldID - the field to add sel_ln - the language of the field""" subtitle = """Add existing field to collection""" output = "" if fldID not in [-1, "-1"] and confirm in [1, "1"]: fldID = int(fldID) ares = add_col_fld(colID, fldID, fmeth, fldvID) colID = int(colID) lang = dict(get_languages()) res = get_def_name('', "field") col_dict = dict(get_def_name('', "collection")) fld_dict = dict(res) col_fld = dict(map(lambda x: (x[0], x[1]), get_col_fld(colID, fmeth))) fld_value = get_fld_value() fldv_dict = dict(map(lambda x: (x[0], x[1]), fld_value)) if fldvID: fldvID = int(fldvID) text = """ Field
""" output += createhiddenform(action="addexistingfield#6.2", text=text, button="Add", colID=colID, fmeth=fmeth, ln=ln, confirm=1) if fldID not in [-1, "-1"] and confirm in [1, "1"]: output += write_outcome(ares) elif fldID in [-1, "-1"] and confirm not in [-1, "-1"]: output += """Select a field. """ try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) if fmeth == "soo": return perform_showsortoptions(colID, ln, content=output) elif fmeth == "sew": return perform_showsearchfields(colID, ln, content=output) elif fmeth == "seo": return perform_showsearchoptions(colID, ln, content=output) def perform_showsortoptions(colID, ln, callback='yes', content='', confirm=-1): """show the sort fields of this collection..""" colID = int(colID) col_dict = dict(get_def_name('', "collection")) fld_dict = dict(get_def_name('', "field")) fld_type = get_sort_nametypes() subtitle = """8. Modify sort options for collection '%s'   [?]""" % (col_dict[colID], weburl) output = """
Field actions (not related to this collection)
Go to the BibIndex interface to modify the available sort options
Collection specific actions
Add sort option to collection
Order sort options alphabetically
""" % (colID, ln, colID, ln) header = ['', 'Sort option', 'Actions'] actions = [] cdslang = get_languages() lang = dict(cdslang) fld_type_list = fld_type.items() if len(get_col_fld(colID, 'soo')) > 0: res = get_col_fld(colID, 'soo') i = 0 for (fldID, fldvID, stype, score, score_fieldvalue) in res: move = """
""" if i != 0: move += """""" % (weburl, colID, ln, fldID, res[i - 1][0], random.randint(0, 1000), weburl) else: move += "    " move += "" i += 1 if i != len(res): move += """""" % (weburl, colID, ln, fldID, res[i][0], random.randint(0, 1000), weburl) move += """
""" actions.append([move, fld_dict[int(fldID)]]) for col in [(('Remove sort option', 'removefield'),)]: actions[-1].append('%s' % (weburl, col[0][1], colID, ln, fldID, col[0][0])) for (str, function) in col[1:]: actions[-1][-1] += ' / %s' % (weburl, function, colID, ln, fldID, str) output += tupletotable(header=header, tuple=actions) else: output += """No sort options exists for this collection""" output += content try: body = [output, extra] except NameError: body = [output] if callback: return perform_editcollection(colID, ln, "perform_showsortoptions", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_showsearchfields(colID, ln, callback='yes', content='', confirm=-1): """show the search fields of this collection..""" colID = int(colID) col_dict = dict(get_def_name('', "collection")) fld_dict = dict(get_def_name('', "field")) fld_type = get_sort_nametypes() subtitle = """6. Modify search fields for collection '%s'   [?]""" % (col_dict[colID], weburl) output = """
Field actions (not related to this collection)
Go to the BibIndex interface to modify the available search fields
Collection specific actions
Add search field to collection
Order search fields alphabetically
""" % (colID, ln, colID, ln) header = ['', 'Search field', 'Actions'] actions = [] cdslang = get_languages() lang = dict(cdslang) fld_type_list = fld_type.items() if len(get_col_fld(colID, 'sew')) > 0: res = get_col_fld(colID, 'sew') i = 0 for (fldID, fldvID, stype, score, score_fieldvalue) in res: move = """
""" if i != 0: move += """""" % (weburl, colID, ln, fldID, res[i - 1][0], random.randint(0, 1000), weburl) else: move += "   " move += "" i += 1 if i != len(res): move += '' % (weburl, colID, ln, fldID, res[i][0], random.randint(0, 1000), weburl) move += """
""" actions.append([move, fld_dict[int(fldID)]]) for col in [(('Remove search field', 'removefield'),)]: actions[-1].append('%s' % (weburl, col[0][1], colID, ln, fldID, col[0][0])) for (str, function) in col[1:]: actions[-1][-1] += ' / %s' % (weburl, function, colID, ln, fldID, str) output += tupletotable(header=header, tuple=actions) else: output += """No search fields exists for this collection""" output += content try: body = [output, extra] except NameError: body = [output] if callback: return perform_editcollection(colID, ln, "perform_showsearchfields", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_showsearchoptions(colID, ln, callback='yes', content='', confirm=-1): """show the sort and search options of this collection..""" colID = int(colID) col_dict = dict(get_def_name('', "collection")) fld_dict = dict(get_def_name('', "field")) fld_type = get_sort_nametypes() subtitle = """7. Modify search options for collection '%s'   [?]""" % (col_dict[colID], weburl) output = """
Field actions (not related to this collection)
Go to the BibIndex interface to modify the available search options
Collection specific actions
Add search option to collection
Order search options alphabetically
""" % (colID, ln, colID, ln) header = ['', 'Search option', 'Actions'] actions = [] cdslang = get_languages() lang = dict(cdslang) fld_type_list = fld_type.items() fld_distinct = run_sql("SELECT distinct(id_field) FROM collection_field_fieldvalue WHERE type='seo' AND id_collection=%s ORDER by score desc" % colID) if len(fld_distinct) > 0: i = 0 for (id) in fld_distinct: fldID = id[0] col_fld = get_col_fld(colID, 'seo', fldID) move = "" if i != 0: move += """""" % (weburl, colID, ln, fldID, fld_distinct[i - 1][0], random.randint(0, 1000), weburl) else: move += "   " i += 1 if i != len(fld_distinct): move += '' % (weburl, colID, ln, fldID, fld_distinct[i][0], random.randint(0, 1000), weburl) actions.append([move, "%s" % fld_dict[fldID]]) for col in [(('Modify values', 'modifyfield'), ('Remove search option', 'removefield'),)]: actions[-1].append('%s' % (weburl, col[0][1], colID, ln, fldID, col[0][0])) for (str, function) in col[1:]: actions[-1][-1] += ' / %s' % (weburl, function, colID, ln, fldID, str) output += tupletotable(header=header, tuple=actions) else: output += """No search options exists for this collection""" output += content try: body = [output, extra] except NameError: body = [output] if callback: return perform_editcollection(colID, ln, "perform_showsearchoptions", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_modifyfield(colID, fldID, fldvID='', ln=cdslang, content='',callback='yes', confirm=0): """Modify the fieldvalues for a field""" colID = int(colID) col_dict = dict(get_def_name('', "collection")) fld_dict = dict(get_def_name('', "field")) fld_type = get_sort_nametypes() fldID = int(fldID) subtitle = """Modify values for field '%s'""" % (fld_dict[fldID]) output = """
Value specific actions
Add existing value to search option
Add new value to search option
Order values alphabetically
""" % (colID, ln, fldID, colID, ln, fldID, colID, ln, fldID) header = ['', 'Value name', 'Actions'] actions = [] cdslang = get_languages() lang = dict(cdslang) fld_type_list = fld_type.items() col_fld = list(get_col_fld(colID, 'seo', fldID)) if len(col_fld) == 1 and col_fld[0][1] == None: output += """No values added for this search option yet""" else: j = 0 for (fldID, fldvID, stype, score, score_fieldvalue) in col_fld: fieldvalue = get_fld_value(fldvID) move = "" if j != 0: move += """""" % (weburl, colID, ln, fldID, fldvID, col_fld[j - 1][1], random.randint(0, 1000), weburl) else: move += "   " j += 1 if j != len(col_fld): move += """""" % (weburl, colID, ln, fldID, fldvID, col_fld[j][1], random.randint(0, 1000), weburl) if fieldvalue[0][1] != fieldvalue[0][2] and fldvID != None: actions.append([move, "%s - %s" % (fieldvalue[0][1],fieldvalue[0][2])]) elif fldvID != None: actions.append([move, "%s" % fieldvalue[0][1]]) move = '' for col in [(('Modify value', 'modifyfieldvalue'), ('Remove value', 'removefieldvalue'),)]: actions[-1].append('%s' % (weburl, col[0][1], colID, ln, fldID, fldvID, col[0][0])) for (str, function) in col[1:]: actions[-1][-1] += ' / %s' % (weburl, function, colID, ln, fldID, fldvID, str) output += tupletotable(header=header, tuple=actions) output += content try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) if len(col_fld) == 0: output = content return perform_showsearchoptions(colID, ln, content=output) def perform_showoutputformats(colID, ln, callback='yes', content='', confirm=-1): """shows the outputformats of the current collection colID - the collection id.""" colID = int(colID) col_dict = dict(get_def_name('', "collection")) subtitle = """10. Modify output formats for collection '%s'   [?]""" % (col_dict[colID], weburl) output = """
Output format actions (not specific to the chosen collection)
Go to the BibFormat interface to modify
Collection specific actions
Add existing output format to collection
""" % (colID, ln) header = ['', 'Code', 'Output format', 'Actions'] actions = [] col_fmt = get_col_fmt(colID) fmt_dict = dict(get_def_name('', "format")) i = 0 if len(col_fmt) > 0: for (id_format, colID_fld, code, score) in col_fmt: move = """
""" if i != 0: move += """""" % (weburl, colID, ln, id_format, col_fmt[i - 1][0], random.randint(0, 1000), weburl) else: move += "   " move += "" i += 1 if i != len(col_fmt): move += '' % (weburl, colID, ln, id_format, col_fmt[i][0], random.randint(0, 1000), weburl) move += """
""" actions.append([move, code, fmt_dict[int(id_format)]]) for col in [(('Remove', 'removeoutputformat'),)]: actions[-1].append('%s' % (weburl, col[0][1], colID, ln, id_format, col[0][0])) for (str, function) in col[1:]: actions[-1][-1] += ' / %s' % (weburl, function, colID, ln, id_format, str) output += tupletotable(header=header, tuple=actions) else: output += """No output formats exists for this collection""" output += content try: body = [output, extra] except NameError: body = [output] if callback: return perform_editcollection(colID, ln, "perform_showoutputformats", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_addexistingoutputformat(colID, ln, fmtID=-1, callback='yes', confirm=-1): """form to add an existing output format to a collection. colID - the collection the format should be added to fmtID - the format to add.""" subtitle = """Add existing output format to collection""" output = "" if fmtID not in [-1, "-1"] and confirm in [1, "1"]: ares = add_col_fmt(colID, fmtID) colID = int(colID) res = get_def_name('', "format") fmt_dict = dict(res) col_dict = dict(get_def_name('', "collection")) col_fmt = get_col_fmt(colID) col_fmt = dict(map(lambda x: (x[0], x[2]), col_fmt)) if len(res) > 0: text = """ Output format
""" output += createhiddenform(action="addexistingoutputformat#10.2", text=text, button="Add", colID=colID, ln=ln, confirm=1) else: output = """No existing output formats to add, please create a new one.""" if fmtID not in [-1, "-1"] and confirm in [1, "1"]: output += write_outcome(ares) elif fmtID in [-1, "-1"] and confirm not in [-1, "-1"]: output += """Please select output format.""" try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_showoutputformats(colID, ln, content=output) def perform_deleteoutputformat(colID, ln, fmtID=-1, callback='yes', confirm=-1): """form to delete an output format not in use. colID - the collection id of the current collection. fmtID - the format id to delete.""" subtitle = """Delete an unused output format""" output = """
Deleting an output format will also delete the translations associated.
""" colID = int(colID) if fmtID not in [-1," -1"] and confirm in [1, "1"]: fmt_dict = dict(get_def_name('', "format")) old_colNAME = fmt_dict[int(fmtID)] ares = delete_fmt(int(fmtID)) res = get_def_name('', "format") fmt_dict = dict(res) col_dict = dict(get_def_name('', "collection")) col_fmt = get_col_fmt() col_fmt = dict(map(lambda x: (x[0], x[2]), col_fmt)) if len(res) > 0: text = """ Output format
""" output += createhiddenform(action="deleteoutputformat#10.3", text=text, button="Delete", colID=colID, ln=ln, confirm=0) if fmtID not in [-1,"-1"]: fmtID = int(fmtID) if confirm in [0, "0"]: text = """Do you want to delete the output format '%s'. """ % fmt_dict[fmtID] output += createhiddenform(action="deleteoutputformat#10.3", text=text, button="Confirm", colID=colID, fmtID=fmtID, ln=ln, confirm=1) elif confirm in [1, "1"]: output += write_outcome(ares) elif confirm not in [-1, "-1"]: output += """Choose a output format to delete. """ try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_showoutputformats(colID, ln, content=output) def perform_removeoutputformat(colID, ln, fmtID='', callback='yes', confirm=0): """form to remove an output format from a collection. colID - the collection id of the current collection. fmtID - the format id. """ subtitle = """Remove output format""" output = "" col_dict = dict(get_def_name('', "collection")) fmt_dict = dict(get_def_name('', "format")) if colID and fmtID: colID = int(colID) fmtID = int(fmtID) if confirm in ["0", 0]: text = """Do you want to remove the output format '%s' from the collection '%s'.""" % (fmt_dict[fmtID], col_dict[colID]) output += createhiddenform(action="removeoutputformat#10.5", text=text, button="Confirm", colID=colID, fmtID=fmtID, confirm=1) elif confirm in ["1", 1]: res = remove_fmt(colID, fmtID) output += write_outcome(res) try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_showoutputformats(colID, ln, content=output) def perform_index(colID=1, ln=cdslang, mtype='', content='', confirm=0): """The index method, calling methods to show the collection tree, create new collections and add collections to tree. """ subtitle = "Overview" colID = int(colID) col_dict = dict(get_def_name('', "collection")) output = "" fin_output = "" if not col_dict.has_key(1): res = add_col(cdsname, '', '') if res: fin_output += """Created root collection.
""" else: return "Cannot create root collection, please check database." if cdsname != run_sql("SELECT name from collection WHERE id=1")[0][0]: res = run_sql("update collection set name='%s' where id=1" % cdsname) if res: fin_output += """The name of the root collection has been modified to be the same as the cdsware installation name given prior to installing cdsware.
""" else: return "Error renaming root collection." fin_output += """
0. Show all 1. Create new collection 2. Attach collection to tree 3. Modify collection tree 4. Webcoll Status
5. Collections Status
""" % (weburl, colID, ln, weburl, colID, ln, weburl, colID, ln, weburl, colID, ln, weburl, colID, ln, weburl, colID, ln) if mtype == "": fin_output += """

For managing the collections, select an item from the menu.
""" if mtype == "perform_addcollection" and content: fin_output += content elif mtype == "perform_addcollection" or mtype == "perform_showall": fin_output += perform_addcollection(colID=colID, ln=ln, callback='') fin_output += "
" if mtype == "perform_addcollectiontotree" and content: fin_output += content elif mtype == "perform_addcollectiontotree" or mtype == "perform_showall": fin_output += perform_addcollectiontotree(colID=colID, ln=ln, callback='') fin_output += "
" if mtype == "perform_modifycollectiontree" and content: fin_output += content elif mtype == "perform_modifycollectiontree" or mtype == "perform_showall": fin_output += perform_modifycollectiontree(colID=colID, ln=ln, callback='') fin_output += "
" if mtype == "perform_checkwebcollstatus" and content: fin_output += content elif mtype == "perform_checkwebcollstatus" or mtype == "perform_showall": fin_output += perform_checkwebcollstatus(colID, ln, callback='') if mtype == "perform_checkcollectionstatus" and content: fin_output += content elif mtype == "perform_checkcollectionstatus" or mtype == "perform_showall": fin_output += perform_checkcollectionstatus(colID, ln, callback='') try: body = [fin_output, extra] except NameError: body = [fin_output] return addadminbox('Menu', body) def show_coll_not_in_tree(colID, ln, col_dict): """Returns collections not in tree""" tree = get_col_tree(colID) in_tree = {} output = "These collections are not in the tree, and should be added:
" for (id, up, down, dad, reltype) in tree: in_tree[id] = 1 in_tree[dad] = 1 res = run_sql("SELECT id from collection") if len(res) != len(in_tree): for id in res: if not in_tree.has_key(id[0]): output += """%s , """ % (weburl, id[0], ln, col_dict[id[0]]) output += "

" else: output = "" return output def create_colltree(tree, col_dict, colID, ln, move_from='', move_to='', rtype='', edit=''): """Creates the presentation of the collection tree, with the buttons for modifying it. tree - the tree to present, from get_tree() col_dict - the name of the collections in a dictionary colID - the collection id to start with move_from - if a collection to be moved has been chosen move_to - the collection which should be set as father of move_from rtype - the type of the tree, regular or virtual edit - if the method should output the edit buttons.""" if move_from: move_from_rtype = move_from[0] move_from_id = int(move_from[1:len(move_from)]) tree_from = get_col_tree(colID, move_from_rtype) tree_to = get_col_tree(colID, rtype) tables = 0 tstack = [] i = 0 text = """ """ for i in range(0, len(tree)): id_son = tree[i][0] up = tree[i][1] down = tree[i][2] dad = tree[i][3] reltype = tree[i][4] tmove_from = "" j = i while j > 0: j = j - 1 try: if tstack[j][1] == dad: table = tstack[j][2] for k in range(0, tables - table): tables = tables - 1 text += """
""" break except StandardError, e: pass text += """ """ if i > 0 and tree[i][1] == 0: tables = tables + 1 text += """ """ while tables > 0: text += """
""" if i == 0: tstack.append((id_son, dad, 1)) else: tstack.append((id_son, dad, tables)) if up == 1 and edit: text += """""" % (weburl, colID, ln, i, rtype, tree[i][0], weburl) else: text += """ """ text += "" if down == 1 and edit: text += """""" % (weburl, colID, ln, i, rtype, tree[i][0], weburl) else: text += """ """ text += "" if edit: if move_from and move_to: tmove_from = move_from move_from = '' if not (move_from == "" and i == 0) and not (move_from != "" and int(move_from[1:len(move_from)]) == i and rtype == move_from[0]): check = "true" if move_from: #if tree_from[move_from_id][0] == tree_to[i][0] or not check_col(tree_to[i][0], tree_from[move_from_id][0]): # check = '' #elif not check_col(tree_to[i][0], tree_from[move_from_id][0]): # check = '' #if not check and (tree_to[i][0] == 1 and tree_from[move_from_id][3] == tree_to[i][0] and move_from_rtype != rtype): # check = "true" if check: text += """ """ % (weburl, colID, ln, move_from, rtype, i, rtype, weburl, col_dict[tree_from[int(move_from[1:len(move_from)])][0]], col_dict[tree_to[i][0]]) else: try: text += """""" % (weburl, colID, ln, rtype, i, rtype, tree[i][0], weburl, col_dict[tree[i][0]]) except KeyError: pass else: text += """ """ % weburl else: text += """ """ % weburl text += """ """ if edit: try: text += """""" % (weburl, colID, ln, i, rtype, tree[i][0], weburl) except KeyError: pass elif i != 0: text += """ """ % weburl text += """ """ if tmove_from: move_from = tmove_from try: text += """%s%s%s%s%s""" % (tree[i][0], (reltype=="v" and '' or ''), weburl, tree[i][0], ln, col_dict[id_son], (move_to=="%s%s" %(rtype,i) and ' ' % weburl or ''), (move_from=="%s%s" % (rtype,i) and ' ' % weburl or ''), (reltype=="v" and '' or '')) except KeyError: pass text += """
""" tables = tables - 1 text += """ """ return text def perform_deletecollection(colID, ln, confirm=-1, callback='yes'): """form to delete a collection colID - id of collection """ subtitle ='' output = """
WARNING:
When deleting a collection, you also deletes all data related to the collection like translations, relations to other collections and information about which rank methods to use.
For more information, please go to the WebSearch guide and read the section regarding deleting a collection.
""" % weburl col_dict = dict(get_def_name('', "collection")) if colID !=1 and colID and col_dict.has_key(int(colID)): colID = int(colID) subtitle = """4. Delete collection '%s'   [?]""" % (col_dict[colID], weburl) res = run_sql("SELECT * from collection_collection WHERE id_dad=%s" % colID) res2 = run_sql("SELECT * from collection_collection WHERE id_son=%s" % colID) if not res and not res2: if confirm in ["-1", -1]: text = """Do you want to delete this collection.""" output += createhiddenform(action="deletecollection#4", text=text, colID=colID, button="Delete", confirm=0) elif confirm in ["0", 0]: text = """Are you sure you want to delete this collection.""" output += createhiddenform(action="deletecollection#4", text=text, colID=colID, button="Confirm", confirm=1) elif confirm in ["1", 1]: result = delete_col(colID) if not result: raise StandardException else: output = """Can not delete a collection that is a part of the collection tree, remove collection from the tree and try again.""" else: subtitle = """4. Delete collection""" output = """Not possible to delete the root collection""" try: body = [output, extra] except NameError: body = [output] if callback: return perform_editcollection(colID, ln, "perform_deletecollection", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_editcollection(colID=1, ln=cdslang, mtype='', content=''): """interface to modify a collection. this method is calling other methods which again is calling this and sending back the output of the method. if callback, the method will call perform_editcollection, if not, it will just return its output. colID - id of the collection mtype - the method that called this method. content - the output from that method.""" colID = int(colID) col_dict = dict(get_def_name('', "collection")) if not col_dict.has_key(colID): return """Collection deleted. """ fin_output = """
Menu
0. Show all 1. Modify collection query 2. Modify access restrictions 3. Modify translations 4. Delete collection
5. Modify portalboxes 6. Modify search fields 7. Modify search options 8. Modify sort options 9. Modify rank options
10.Modify output formats
""" % (colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln) if mtype == "perform_modifydbquery" and content: fin_output += content elif mtype == "perform_modifydbquery" or not mtype: fin_output += perform_modifydbquery(colID, ln, callback='') if mtype == "perform_modifyrestricted" and content: fin_output += content elif mtype == "perform_modifyrestricted" or not mtype: fin_output += perform_modifyrestricted(colID, ln, callback='') if mtype == "perform_modifytranslations" and content: fin_output += content elif mtype == "perform_modifytranslations" or not mtype: fin_output += perform_modifytranslations(colID, ln, callback='') if mtype == "perform_deletecollection" and content: fin_output += content elif mtype == "perform_deletecollection" or not mtype: fin_output += perform_deletecollection(colID, ln, callback='') if mtype == "perform_showportalboxes" and content: fin_output += content elif mtype == "perform_showportalboxes" or not mtype: fin_output += perform_showportalboxes(colID, ln, callback='') if mtype == "perform_showsearchfields" and content: fin_output += content elif mtype == "perform_showsearchfields" or not mtype: fin_output += perform_showsearchfields(colID, ln, callback='') if mtype == "perform_showsearchoptions" and content: fin_output += content elif mtype == "perform_showsearchoptions" or not mtype: fin_output += perform_showsearchoptions(colID, ln, callback='') if mtype == "perform_showsortoptions" and content: fin_output += content elif mtype == "perform_showsortoptions" or not mtype: fin_output += perform_showsortoptions(colID, ln, callback='') if mtype == "perform_modifyrankmethods" and content: fin_output += content elif mtype == "perform_modifyrankmethods" or not mtype: fin_output += perform_modifyrankmethods(colID, ln, callback='') if mtype == "perform_showoutputformats" and content: fin_output += content elif mtype == "perform_showoutputformats" or not mtype: fin_output += perform_showoutputformats(colID, ln, callback='') return addadminbox("Overview of edit options for collection '%s'" % col_dict[colID], [fin_output]) def perform_checkwebcollstatus(colID, ln, confirm=0, callback='yes'): """Check status of the collection tables with respect to the webcoll cache.""" subtitle = """Webcoll Status   [?]""" % weburl output = "" colID = int(colID) col_dict = dict(get_def_name('', "collection")) output += """
Last updates:
""" collection_table_update_time = "" collection_web_update_time = "" res = run_sql("SHOW TABLE STATUS LIKE 'collection'") if res: collection_table_update_time = re.sub(r'\.00$', '', str(res[0][11])) output += "Collection table last updated: %s
" % collection_table_update_time try: file = open("%s/collections/1/last-updated-ln=en.html" % cachedir) collection_web_update_time = string.strip(file.readline()) collection_web_update_time = re.sub(r'[A-Z ]+$', '', collection_web_update_time) output += "Collection webpage last updated: %s
" % collection_web_update_time file.close() except StandardError, e: pass # reformat collection_web_update_time to the format suitable for comparisons try: - collection_web_update_time = time.strftime("%04Y-%02m-%02d %02H:%02M:%02S", + collection_web_update_time = time.strftime("%Y-%m-%d %H:%M:%S", time.strptime(collection_web_update_time, "%d %b %Y %H:%M:%S")) except ValueError, e: pass if collection_table_update_time > collection_web_update_time: output += """
Warning: The collections has been modified since last time Webcoll was executed, to process the changes, Webcoll must be executed.
""" header = ['ID', 'Name', 'Time', 'Status', 'Progress'] actions = [] output += """
Last BibSched tasks:
""" res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='webcoll' and runtime< now() ORDER by runtime") if len(res) > 0: (id, proc, host, user, runtime, sleeptime, arguments, status, progress) = res[len(res) - 1] webcoll__update_time = runtime actions.append([id, proc, runtime, (status !="" and status or ''), (progress !="" and progress or '')]) else: actions.append(['', 'webcoll', '', '', 'Not executed yet']) res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='bibindex' and runtime< now() ORDER by runtime") if len(res) > 0: (id, proc, host, user, runtime, sleeptime, arguments, status, progress) = res[len(res) - 1] actions.append([id, proc, runtime, (status !="" and status or ''), (progress !="" and progress or '')]) else: actions.append(['', 'bibindex', '', '', 'Not executed yet']) output += tupletotable(header=header, tuple=actions) output += """
Next scheduled BibSched run:
""" actions = [] res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='webcoll' and runtime > now() ORDER by runtime") webcoll_future = "" if len(res) > 0: (id, proc, host, user, runtime, sleeptime, arguments, status, progress) = res[0] webcoll__update_time = runtime actions.append([id, proc, runtime, (status !="" and status or ''), (progress !="" and progress or '')]) webcoll_future = "yes" else: actions.append(['', 'webcoll', '', '', 'Not scheduled']) res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='bibindex' and runtime > now() ORDER by runtime") bibindex_future = "" if len(res) > 0: (id, proc, host, user, runtime, sleeptime, arguments, status, progress) = res[0] actions.append([id, proc, runtime, (status !="" and status or ''), (progress !="" and progress or '')]) bibindex_future = "yes" else: actions.append(['', 'bibindex', '', '', 'Not scheduled']) output += tupletotable(header=header, tuple=actions) if webcoll_future == "": output += """
Warning: Webcoll is not scheduled for a future run by bibsched, any updates to the collection will not be processed.
""" if bibindex_future == "": output += """
Warning: Bibindex is not scheduled for a future run by bibsched, any updates to the records will not be processed.
""" try: body = [output, extra] except NameError: body = [output] if callback: return perform_index(colID, ln, "perform_checkwebcollstatus", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_checkcollectionstatus(colID, ln, confirm=0, callback='yes'): """Check the configuration of the collections.""" subtitle = """Collections Status   [?]""" % weburl output = "" colID = int(colID) col_dict = dict(get_def_name('', "collection")) collections = run_sql("SELECT id, name, dbquery, restricted FROM collection ORDER BY id") header = ['ID', 'Name', 'Query', 'Subcollections', 'Restricted', 'I18N','Status'] rnk_list = get_def_name('', "rnkMETHOD") actions = [] for (id, name, dbquery, restricted) in collections: reg_sons = len(get_col_tree(id, 'r')) vir_sons = len(get_col_tree(id, 'v')) status = "" langs = run_sql("SELECT ln from collectionname where id_collection=%s" % id) i8n = "" if len(langs) > 0: for lang in langs: i8n += "%s, " % lang else: i8n = """None""" if (reg_sons > 1 and dbquery) or dbquery=="": status = """1:Query""" elif dbquery is None and reg_sons == 1: status = """2:Query""" elif dbquery == "" and reg_sons == 1: status = """3:Query""" if (reg_sons > 1 or vir_sons > 1): subs = """Yes""" else: subs = """No""" if dbquery is None: dbquery = """No""" if restricted == "": restricted = "" if status: status += """,4:Restricted""" else: status += """4:Restricted""" elif restricted is None: restricted = """No""" if status == "": status = """OK""" actions.append([id, """%s""" % (weburl, id, ln, name), dbquery, subs, restricted, i8n, status]) output += tupletotable(header=header, tuple=actions) try: body = [output, extra] except NameError: body = [output] return addadminbox(subtitle, body) if callback: return perform_index(colID, ln, "perform_checkcollectionstatus", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_removeoutputformat(colID, ln, fmtID='', callback='yes', confirm=0): """form to remove an output format from a collection. colID - the collection id of the current collection. fmtID - the format id. """ subtitle = """Remove output format""" output = "" col_dict = dict(get_def_name('', "collection")) fmt_dict = dict(get_def_name('', "format")) if colID and fmtID: colID = int(colID) fmtID = int(fmtID) if confirm in ["0", 0]: text = """Do you want to remove the output format '%s' from the collection '%s'.""" % (fmt_dict[fmtID], col_dict[colID]) output += createhiddenform(action="removeoutputformat#10.5", text=text, button="Confirm", colID=colID, fmtID=fmtID, confirm=1) elif confirm in ["1", 1]: res = remove_fmt(colID, fmtID) output += write_outcome(res) try: body = [output, extra] except NameError: body = [output] output = "
" + addadminbox(subtitle, body) return perform_showoutputformats(colID, ln, content=output) def get_col_tree(colID, rtype=''): """Returns a presentation of the tree as a list. TODO: Add loop detection colID - startpoint for the tree rtype - get regular or virtual part of the tree""" try: colID = int(colID) stack = [colID] ssize = 0 tree = [(colID, 0, 0, colID, 'r')] while len(stack) > 0: ccolID = stack.pop() if ccolID == colID and rtype: res = run_sql("SELECT id_son, score, type FROM collection_collection WHERE id_dad=%s AND type='%s' ORDER BY score ASC,id_son" % (ccolID,rtype)) else: res = run_sql("SELECT id_son, score, type FROM collection_collection WHERE id_dad=%s ORDER BY score ASC,id_son" % ccolID) ssize += 1 ntree = [] for i in range(0,len(res)): id_son = res[i][0] score = res[i][1] rtype = res[i][2] stack.append(id_son) if i == (len(res) - 1): up = 0 else: up = 1 if i == 0: down = 0 else: down = 1 ntree.insert(0, (id_son, up, down, ccolID, rtype)) tree = tree[0:ssize] + ntree + tree[ssize:len(tree)] return tree except StandardError, e: return () def add_col_dad_son(add_dad, add_son, rtype): """Add a son to a collection (dad) add_dad - add to this collection id add_son - add this collection id rtype - either regular or virtual""" try: res = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s ORDER BY score ASC" % add_dad) highscore = 0 for score in res: if int(score[0]) > highscore: highscore = int(score[0]) highscore += 1 res = run_sql("INSERT INTO collection_collection(id_dad,id_son,score,type) values(%s,%s,%s,'%s')" % (add_dad, add_son, highscore, rtype)) return (1, highscore) except StandardError, e: return (0, e) def compare_on_val(first, second): """Compare the two values""" return cmp(first[1], second[1]) def get_col_fld(colID=-1, type = '', id_field=''): """Returns either all portalboxes associated with a collection, or based on either colID or language or both. colID - collection id ln - language id""" sql = "SELECT id_field,id_fieldvalue,type,score,score_fieldvalue FROM collection_field_fieldvalue, field WHERE id_field=field.id" try: if colID > -1: sql += " AND id_collection=%s" % colID if id_field: sql += " AND id_field=%s" % id_field if type: sql += " AND type='%s'" % type sql += " ORDER BY type, score desc, score_fieldvalue desc" res = run_sql(sql) return res except StandardError, e: return "" def get_col_pbx(colID=-1, ln='', position = ''): """Returns either all portalboxes associated with a collection, or based on either colID or language or both. colID - collection id ln - language id""" sql = "SELECT id_portalbox, id_collection, ln, score, position, title, body FROM collection_portalbox, portalbox WHERE id_portalbox = portalbox.id" try: if colID > -1: sql += " AND id_collection=%s" % colID if ln: sql += " AND ln='%s'" % ln if position: sql += " AND position='%s'" % position sql += " ORDER BY position, ln, score desc" res = run_sql(sql) return res except StandardError, e: return "" def get_col_fmt(colID=-1): """Returns all formats currently associated with a collection, or for one specific collection colID - the id of the collection""" try: if colID not in [-1, "-1"]: res = run_sql("SELECT id_format, id_collection, code, score FROM collection_format, format WHERE id_format = format.id AND id_collection=%s ORDER BY score desc" % colID) else: res = run_sql("SELECT id_format, id_collection, code, score FROM collection_format, format WHERE id_format = format.id ORDER BY score desc") return res except StandardError, e: return "" def get_col_rnk(colID, ln): """ Returns a list of the rank methods the given collection is attached to colID - id from collection""" try: res1 = dict(run_sql("SELECT id_rnkMETHOD, '' FROM collection_rnkMETHOD WHERE id_collection=%s" % colID)) res2 = get_def_name('', "rnkMETHOD") result = filter(lambda x: res1.has_key(x[0]), res2) return result except StandardError, e: return () def get_pbx(): """Returns all portalboxes""" try: res = run_sql("SELECT id, title, body FROM portalbox ORDER by title,body") return res except StandardError, e: return "" def get_fld_value(fldvID = ''): """Returns fieldvalue""" try: sql = "SELECT id, name, value FROM fieldvalue" if fldvID: sql += " WHERE id=%s" % fldvID sql += " ORDER BY name" res = run_sql(sql) return res except StandardError, e: return "" def get_pbx_pos(): """Returns a list of all the positions for a portalbox""" position = {} position["rt"] = "Right Top" position["lt"] = "Left Top" position["te"] = "Title Epilog" position["tp"] = "Title Prolog" position["ne"] = "Narrow by coll epilog" position["np"] = "Narrow by coll prolog" return position def get_sort_nametypes(): """Return a list of the various translationnames for the fields""" type = {} type['soo'] = 'Sort options' type['seo'] = 'Search options' type['sew'] = 'Search within' return type def get_fmt_nametypes(): """Return a list of the various translationnames for the output formats""" type = [] type.append(('ln', 'Long name')) return type def get_fld_nametypes(): """Return a list of the various translationnames for the fields""" type = [] type.append(('ln', 'Long name')) return type def get_col_nametypes(): """Return a list of the various translationnames for the collections""" type = [] type.append(('ln', 'Long name')) return type def find_last(tree, start_son): """Find the previous collection in the tree with the same father as start_son""" id_dad = tree[start_son][3] while start_son > 0: start_son -= 1 if tree[start_son][3] == id_dad: return start_son def find_next(tree, start_son): """Find the next collection in the tree with the same father as start_son""" id_dad = tree[start_son][3] while start_son < len(tree): start_son += 1 if tree[start_son][3] == id_dad: return start_son def remove_col_subcol(id_son, id_dad, type): """Remove a collection as a son of another collection in the tree, if collection isn't used elsewhere in the tree, remove all registered sons of the id_son. id_son - collection id of son to remove id_dad - the id of the dad""" try: if id_son != id_dad: tree = get_col_tree(id_son) res = run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s" % (id_son, id_dad)) else: tree = get_col_tree(id_son, type) res = run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s and type='%s'" % (id_son, id_dad, type)) if not run_sql("SELECT * from collection_collection WHERE id_son=%s and type='%s'" % (id_son, type)): for (id, up, down, dad, rtype) in tree: res = run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s" % (id, dad)) return (1, "") except StandardError, e: return (0, e) def check_col(add_dad, add_son): """Check if the collection can be placed as a son of the dad without causing loops. add_dad - collection id add_son - collection id""" try: stack = [add_dad] res = run_sql("SELECT id_dad FROM collection_collection WHERE id_dad=%s AND id_son=%s" % (add_dad,add_son)) if res: raise StandardError while len(stack) > 0: colID = stack.pop() res = run_sql("SELECT id_dad FROM collection_collection WHERE id_son=%s" % colID) for id in res: if int(id[0]) == int(add_son): raise StandardError else: stack.append(id[0]) return (1, "") except StandardError, e: return (0, e) def attach_rnk_col(colID, rnkID): """attach rank method to collection rnkID - id from rnkMETHOD table colID - id of collection, as in collection table """ try: res = run_sql("INSERT INTO collection_rnkMETHOD(id_collection, id_rnkMETHOD) values (%s,%s)" % (colID, rnkID)) return (1, "") except StandardError, e: return (0, e) def detach_rnk_col(colID, rnkID): """detach rank method from collection rnkID - id from rnkMETHOD table colID - id of collection, as in collection table """ try: res = run_sql("DELETE FROM collection_rnkMETHOD WHERE id_collection=%s AND id_rnkMETHOD=%s" % (colID, rnkID)) return (1, "") except StandardError, e: return (0, e) def switch_col_treescore(col_1, col_2): try: res1 = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s and id_son=%s" % (col_1[3], col_1[0])) res2 = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s and id_son=%s" % (col_2[3], col_2[0])) res = run_sql("UPDATE collection_collection SET score=%s WHERE id_dad=%s and id_son=%s" % (res2[0][0], col_1[3], col_1[0])) res = run_sql("UPDATE collection_collection SET score=%s WHERE id_dad=%s and id_son=%s" % (res1[0][0], col_2[3], col_2[0])) return (1, "") except StandardError, e: return (0, e) def move_col_tree(col_from, col_to, move_to_rtype=''): """Move a collection from one point in the tree to another. becomes a son of the endpoint. col_from - move this collection from current point col_to - and set it as a son of this collection. move_to_rtype - either virtual or regular collection""" try: res = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s ORDER BY score asc" % col_to[0]) highscore = 0 for score in res: if int(score[0]) > highscore: highscore = int(score[0]) highscore += 1 if not move_to_rtype: move_to_rtype = col_from[4] res = run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s" % (col_from[0], col_from[3])) res = run_sql("INSERT INTO collection_collection(id_dad,id_son,score,type) values(%s,%s,%s,'%s')" % (col_to[0], col_from[0], highscore, move_to_rtype)) return (1, "") except StandardError, e: return (0, e) def remove_pbx(colID, pbxID, ln): """Removes a portalbox from the collection given. colID - the collection the format is connected to pbxID - the portalbox which should be removed from the collection. ln - the language of the portalbox to be removed""" try: res = run_sql("DELETE FROM collection_portalbox WHERE id_collection=%s AND id_portalbox=%s AND ln='%s'" % (colID, pbxID, ln)) return (1, "") except StandardError, e: return (0, e) def remove_fmt(colID,fmtID): """Removes a format from the collection given. colID - the collection the format is connected to fmtID - the format which should be removed from the collection.""" try: res = run_sql("DELETE FROM collection_format WHERE id_collection=%s AND id_format=%s" % (colID, fmtID)) return (1, "") except StandardError, e: return (0, e) def remove_fld(colID,fldID, fldvID=''): """Removes a field from the collection given. colID - the collection the format is connected to fldID - the field which should be removed from the collection.""" try: sql = "DELETE FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s" % (colID, fldID) if fldvID: if fldvID != "None": sql += " AND id_fieldvalue=%s" % fldvID else: sql += " AND id_fieldvalue is NULL" res = run_sql(sql) return (1, "") except StandardError, e: return (0, e) def delete_fldv(fldvID): """Deletes all data for the given fieldvalue fldvID - delete all data in the tables associated with fieldvalue and this id""" try: res = run_sql("DELETE FROM collection_field_fieldvalue WHERE id_fieldvalue=%s" % fldvID) res = run_sql("DELETE FROM fieldvalue WHERE id=%s" % fldvID) return (1, "") except StandardError, e: return (0, e) def delete_pbx(pbxID): """Deletes all data for the given portalbox pbxID - delete all data in the tables associated with portalbox and this id """ try: res = run_sql("DELETE FROM collection_portalbox WHERE id_portalbox=%s" % pbxID) res = run_sql("DELETE FROM portalbox WHERE id=%s" % pbxID) return (1, "") except StandardError, e: return (0, e) def delete_fmt(fmtID): """Deletes all data for the given format fmtID - delete all data in the tables associated with format and this id """ try: res = run_sql("DELETE FROM format WHERE id=%s" % fmtID) res = run_sql("DELETE FROM collection_format WHERE id_format=%s" % fmtID) res = run_sql("DELETE FROM formatname WHERE id_format=%s" % fmtID) return (1, "") except StandardError, e: return (0, e) def delete_col(colID): """Deletes all data for the given collection colID - delete all data in the tables associated with collection and this id """ try: res = run_sql("DELETE FROM collection WHERE id=%s" % colID) res = run_sql("DELETE FROM collectionname WHERE id_collection=%s" % colID) res = run_sql("DELETE FROM collection_rnkMETHOD WHERE id_collection=%s" % colID) res = run_sql("DELETE FROM collection_collection WHERE id_dad=%s" % colID) res = run_sql("DELETE FROM collection_collection WHERE id_son=%s" % colID) res = run_sql("DELETE FROM collection_portalbox WHERE id_collection=%s" % colID) res = run_sql("DELETE FROM collection_format WHERE id_collection=%s" % colID) res = run_sql("DELETE FROM collection_field_fieldvalue WHERE id_collection=%s" % colID) return (1, "") except StandardError, e: return (0, e) def add_fmt(code, name, rtype): """Add a new output format. Returns the id of the format. code - the code for the format, max 6 chars. name - the default name for the default language of the format. rtype - the default nametype""" try: res = run_sql("INSERT INTO format (code, name) values ('%s','%s')" % (MySQLdb.escape_string(code), MySQLdb.escape_string(name))) fmtID = run_sql("SELECT id FROM format WHERE code='%s'" % MySQLdb.escape_string(code)) res = run_sql("INSERT INTO formatname(id_format, type, ln, value) VALUES (%s,'%s','%s','%s')" % (fmtID[0][0], rtype, cdslang, MySQLdb.escape_string(name))) return (1, fmtID) except StandardError, e: return (0, e) def update_fldv(fldvID, name, value): """Modify existing fieldvalue fldvID - id of fieldvalue to modify value - the value of the fieldvalue name - the name of the fieldvalue.""" try: res = run_sql("UPDATE fieldvalue set name='%s' where id=%s" % (MySQLdb.escape_string(name), fldvID)) res = run_sql("UPDATE fieldvalue set value='%s' where id=%s" % (MySQLdb.escape_string(value), fldvID)) return (1, "") except StandardError, e: return (0, e) def add_fldv(name, value): """Add a new fieldvalue, returns id of fieldvalue value - the value of the fieldvalue name - the name of the fieldvalue.""" try: res = run_sql("SELECT id FROM fieldvalue WHERE name='%s' and value='%s'" % (MySQLdb.escape_string(name), MySQLdb.escape_string(value))) if not res: res = run_sql("INSERT INTO fieldvalue (name, value) values ('%s','%s')" % (MySQLdb.escape_string(name), MySQLdb.escape_string(value))) res = run_sql("SELECT id FROM fieldvalue WHERE name='%s' and value='%s'" % (MySQLdb.escape_string(name), MySQLdb.escape_string(value))) if res: return (1, res[0][0]) else: raise StandardError except StandardError, e: return (0, e) def add_pbx(title, body): try: res = run_sql("INSERT INTO portalbox (title, body) values ('%s','%s')" % (MySQLdb.escape_string(title), MySQLdb.escape_string(body))) res = run_sql("SELECT id FROM portalbox WHERE title='%s' AND body='%s'" % (MySQLdb.escape_string(title), MySQLdb.escape_string(body))) if res: return (1, res[0][0]) else: raise StandardError except StandardError, e: return (0, e) def add_col(colNAME, dbquery, rest): """Adds a new collection to collection table colNAME - the default name for the collection, saved to collection and collectionname dbquery - query related to the collection rest - name of apache group allowed to access collection""" try: rtype = get_col_nametypes()[0][0] colID = run_sql("SELECT id FROM collection WHERE id=1") if colID: sql = "INSERT INTO collection(name,dbquery,restricted) VALUES('%s'" % MySQLdb.escape_string(colNAME) else: sql = "INSERT INTO collection(id,name,dbquery,restricted) VALUES(1,'%s'" % MySQLdb.escape_string(colNAME) if dbquery: sql += ",'%s'" % MySQLdb.escape_string(dbquery) else: sql += ",null" if rest: sql += ",'%s'" % MySQLdb.escape_string(rest) else: sql += ",null" sql += ")" res = run_sql(sql) colID = run_sql("SELECT id FROM collection WHERE name='%s'" % MySQLdb.escape_string(colNAME)) res = run_sql("INSERT INTO collectionname(id_collection, type, ln, value) VALUES (%s,'%s','%s','%s')" % (colID[0][0], rtype, cdslang, MySQLdb.escape_string(colNAME))) if colID: return (1, colID[0][0]) else: raise StandardError except StandardError, e: return (0, e) def add_col_pbx(colID, pbxID, ln, position, score=''): """add a portalbox to the collection. colID - the id of the collection involved pbxID - the portalbox to add ln - which language the portalbox is for score - decides which portalbox is the most important position - position on page the portalbox should appear.""" try: if score: res = run_sql("INSERT INTO collection_portalbox(id_portalbox, id_collection, ln, score, position) values (%s,%s,'%s',%s,'%s')" % (pbxID, colID, ln, score, position)) else: res = run_sql("SELECT score FROM collection_portalbox WHERE id_collection=%s and ln='%s' and position='%s' ORDER BY score desc, ln, position" % (colID, ln, position)) if res: score = int(res[0][0]) else: score = 0 res = run_sql("INSERT INTO collection_portalbox(id_portalbox, id_collection, ln, score, position) values (%s,%s,'%s',%s,'%s')" % (pbxID, colID, ln, (score + 1), position)) return (1, "") except StandardError, e: return (0, e) def add_col_fmt(colID, fmtID, score=''): """Add a output format to the collection. colID - the id of the collection involved fmtID - the id of the format. score - the score of the format, decides sorting, if not given, place the format on top""" try: if score: res = run_sql("INSERT INTO collection_format(id_format, id_collection, score) values (%s,%s,%s)" % (fmtID, colID, score)) else: res = run_sql("SELECT score FROM collection_format WHERE id_collection=%s ORDER BY score desc" % colID) if res: score = int(res[0][0]) else: score = 0 res = run_sql("INSERT INTO collection_format(id_format, id_collection, score) values (%s,%s,%s)" % (fmtID, colID, (score + 1))) return (1, "") except StandardError, e: return (0, e) def add_col_fld(colID, fldID, type, fldvID=''): """Add a sort/search/field to the collection. colID - the id of the collection involved fldID - the id of the field. fldvID - the id of the fieldvalue. type - which type, seo, sew... score - the score of the format, decides sorting, if not given, place the format on top""" try: if fldvID and fldvID not in [-1, "-1"]: run_sql("DELETE FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s and type='%s' and id_fieldvalue is NULL" % (colID, fldID, type)) res = run_sql("SELECT score FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s and type='%s' ORDER BY score desc" % (colID, fldID, type)) if res: score = int(res[0][0]) res = run_sql("SELECT score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s and type='%s' ORDER BY score_fieldvalue desc" % (colID, fldID, type)) else: res = run_sql("SELECT score FROM collection_field_fieldvalue WHERE id_collection=%s and type='%s' ORDER BY score desc" % (colID, type)) if res: score = int(res[0][0]) + 1 else: score = 1 res = run_sql("SELECT * FROM collection_field_fieldvalue where id_field=%s and id_collection=%s and type='%s' and id_fieldvalue=%s" % (fldID, colID, type, fldvID)) if not res: run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=score_fieldvalue+1 WHERE id_field=%s AND id_collection=%s and type='%s'" % (fldID, colID, type)) res = run_sql("INSERT INTO collection_field_fieldvalue(id_field, id_fieldvalue, id_collection, type, score, score_fieldvalue) values (%s,%s,%s,'%s',%s,%s)" % (fldID, fldvID, colID, type, score, 1)) else: return (0, (1, "Already exists")) else: res = run_sql("SELECT * FROM collection_field_fieldvalue WHERE id_collection=%s AND type='%s' and id_field=%s and id_fieldvalue is NULL" % (colID, type, fldID)) if res: return (0, (1, "Already exists")) else: run_sql("UPDATE collection_field_fieldvalue SET score=score+1") res = run_sql("INSERT INTO collection_field_fieldvalue(id_field, id_collection, type, score,score_fieldvalue) values (%s,%s,'%s',%s, 0)" % (fldID, colID, type, 1)) return (1, "") except StandardError, e: return (0, e) def modify_restricted(colID, rest): """Modify which apache group is allowed to use the collection. colID - the id of the collection involved restricted - the new group""" try: sql = "UPDATE collection SET restricted=" if rest: sql += "'%s'" % MySQLdb.escape_string(rest) else: sql += "null" sql += " WHERE id=%s" % colID res = run_sql(sql) return (1, "") except StandardError, e: return (0, e) def modify_dbquery(colID, dbquery): """Modify the dbquery of an collection. colID - the id of the collection involved dbquery - the new dbquery""" try: sql = "UPDATE collection SET dbquery=" if dbquery: sql += "'%s'" % MySQLdb.escape_string(dbquery) else: sql += "null" sql += " WHERE id=%s" % colID res = run_sql(sql) return (1, "") except StandardError, e: return (0, e) def modify_pbx(colID, pbxID, sel_ln, score='', position='', title='', body=''): """Modify a portalbox colID - the id of the collection involved pbxID - the id of the portalbox that should be modified sel_ln - the language of the portalbox that should be modified title - the title body - the content score - if several portalboxes in one position, who should appear on top. position - position on page""" try: if title: res = run_sql("UPDATE portalbox SET title='%s' WHERE id=%s" % (MySQLdb.escape_string(title), pbxID)) if body: res = run_sql("UPDATE portalbox SET body='%s' WHERE id=%s" % (MySQLdb.escape_string(body), pbxID)) if score: res = run_sql("UPDATE collection_portalbox SET score='%s' WHERE id_collection=%s and id_portalbox=%s and ln='%s'" % (score, colID, pbxID, sel_ln)) if position: res = run_sql("UPDATE collection_portalbox SET position='%s' WHERE id_collection=%s and id_portalbox=%s and ln='%s'" % (position, colID, pbxID, sel_ln)) return (1, "") except Exception, e: return (0, e) def switch_fld_score(colID, id_1, id_2): """Switch the scores of id_1 and id_2 in collection_field_fieldvalue colID - collection the id_1 or id_2 is connected to id_1/id_2 - id field from tables like format..portalbox... table - name of the table""" try: res1 = run_sql("SELECT score FROM collection_field_fieldvalue WHERE id_collection=%s and id_field=%s" % (colID, id_1)) res2 = run_sql("SELECT score FROM collection_field_fieldvalue WHERE id_collection=%s and id_field=%s" % (colID, id_2)) if res1[0][0] == res2[0][0]: return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem.")) else: res = run_sql("UPDATE collection_field_fieldvalue SET score=%s WHERE id_collection=%s and id_field=%s" % (res2[0][0], colID, id_1)) res = run_sql("UPDATE collection_field_fieldvalue SET score=%s WHERE id_collection=%s and id_field=%s" % (res1[0][0], colID, id_2)) return (1, "") except StandardError, e: return (0, e) def switch_fld_value_score(colID, id_1, fldvID_1, fldvID_2): """Switch the scores of two field_value colID - collection the id_1 or id_2 is connected to id_1/id_2 - id field from tables like format..portalbox... table - name of the table""" try: res1 = run_sql("SELECT score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s" % (colID, id_1, fldvID_1)) res2 = run_sql("SELECT score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s" % (colID, id_1, fldvID_2)) if res1[0][0] == res2[0][0]: return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem.")) else: res = run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=%s WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s" % (res2[0][0], colID, id_1, fldvID_1)) res = run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=%s WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s" % (res1[0][0], colID, id_1, fldvID_2)) return (1, "") except Exception, e: return (0, e) def switch_pbx_score(colID, id_1, id_2, sel_ln): """Switch the scores of id_1 and id_2 in the table given by the argument. colID - collection the id_1 or id_2 is connected to id_1/id_2 - id field from tables like format..portalbox... table - name of the table""" try: res1 = run_sql("SELECT score FROM collection_portalbox WHERE id_collection=%s and id_portalbox=%s and ln='%s'" % (colID, id_1, sel_ln)) res2 = run_sql("SELECT score FROM collection_portalbox WHERE id_collection=%s and id_portalbox=%s and ln='%s'" % (colID, id_2, sel_ln)) if res1[0][0] == res2[0][0]: return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem.")) res = run_sql("UPDATE collection_portalbox SET score=%s WHERE id_collection=%s and id_portalbox=%s and ln='%s'" % (res2[0][0], colID, id_1, sel_ln)) res = run_sql("UPDATE collection_portalbox SET score=%s WHERE id_collection=%s and id_portalbox=%s and ln='%s'" % (res1[0][0], colID, id_2, sel_ln)) return (1, "") except Exception, e: return (0, e) def switch_score(colID, id_1, id_2, table): """Switch the scores of id_1 and id_2 in the table given by the argument. colID - collection the id_1 or id_2 is connected to id_1/id_2 - id field from tables like format..portalbox... table - name of the table""" try: res1 = run_sql("SELECT score FROM collection_%s WHERE id_collection=%s and id_%s=%s" % (table, colID, table, id_1)) res2 = run_sql("SELECT score FROM collection_%s WHERE id_collection=%s and id_%s=%s" % (table, colID, table, id_2)) if res1[0][0] == res2[0][0]: return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem.")) res = run_sql("UPDATE collection_%s SET score=%s WHERE id_collection=%s and id_%s=%s" % (table, res2[0][0], colID, table, id_1)) res = run_sql("UPDATE collection_%s SET score=%s WHERE id_collection=%s and id_%s=%s" % (table, res1[0][0], colID, table, id_2)) return (1, "") except Exception, e: return (0, e) diff --git a/modules/webstyle/lib/webstyle_templates.py b/modules/webstyle/lib/webstyle_templates.py index 51c4d4e7a..d7079df82 100644 --- a/modules/webstyle/lib/webstyle_templates.py +++ b/modules/webstyle/lib/webstyle_templates.py @@ -1,592 +1,592 @@ ## $Id$ ## CDSware WebStyle templates. ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. import urllib import time import cgi import gettext import traceback import sre import urllib import sys from cdsware.config import * from cdsware.messages import gettext_set_language, language_list_long class Template: def tmpl_navtrailbox_body(self, weburl, ln, title, previous_links, separator, prolog, epilog): """Create navigation trail box body Parameters: - 'weburl' *string* - The base URL for the site - 'ln' *string* - The language to display - 'title' *string* - page title; - 'previous_links' *string* - the trail content from site title until current page (both ends exlusive) - 'prolog' *string* - HTML code to prefix the navtrail item with - 'epilog' *string* - HTML code to suffix the navtrail item with - 'separator' *string* - HTML code that separates two navtrail items Output: - text containing the navtrail """ # load the right message language _ = gettext_set_language(ln) out = "" if title != cdsnameintl[ln]: out += """%(msg_home)s""" % { 'weburl' : weburl, 'ln' : ln, 'msg_home' : _("Home")} if previous_links: if out: out += separator out += previous_links if title: if out: out += separator if title == cdsnameintl[ln]: # hide site name, print Home instead out += _("Home") else: out += title return prolog + out + epilog def tmpl_page(self, weburl, ln, headertitle, sitename = "", supportemail = "", description = "", keywords = "", userinfobox = "", navtrailbox = "", pageheaderadd = "", boxlefttop = "", boxlefttopadd = "", boxleftbottom = "", boxleftbottomadd = "", boxrighttop = "", boxrighttopadd = "", boxrightbottom = "", boxrightbottomadd = "", titleprologue = "", title = "", titleepilogue = "", body = "", version = "", lastupdated = None, languagebox = "", pagefooteradd = "", uid = 0, ): """Creates a complete page Parameters: - 'weburl' *string* - The base URL for the site - 'ln' *string* - The language to display - 'sitename' *string* - the first part of the page HTML title - 'headertitle' *string* - the second part of the page HTML title - 'supportemail' *string* - email of the support team - 'description' *string* - description goes to the metadata in the header of the HTML page - 'keywords' *string* - keywords goes to the metadata in the header of the HTML page - 'userinfobox' *string* - the HTML code for the user information box - 'navtrailbox' *string* - the HTML code for the navigation trail box - 'pageheaderadd' *string* - additional page header HTML code - 'boxlefttop' *string* - left-top box HTML code - 'boxlefttopadd' *string* - additional left-top box HTML code - 'boxleftbottom' *string* - left-bottom box HTML code - 'boxleftbottomadd' *string* - additional left-bottom box HTML code - 'boxrighttop' *string* - right-top box HTML code - 'boxrighttopadd' *string* - additional right-top box HTML code - 'boxrightbottom' *string* - right-bottom box HTML code - 'boxrightbottomadd' *string* - additional right-bottom box HTML code - 'title' *string* - the title of the page - 'body' *string* - the body of the page - 'version' *string* - the version number of CDSware - 'lastupdated' *string* - when the page was last updated - 'languagebox' *string* - the HTML code for the language box - 'pagefooteradd' *string* - additional page footer HTML code Output: - HTML code of the page """ # load the right message language _ = gettext_set_language(ln) if lastupdated: msg_lastupdated = _("Last updated:") + " " + lastupdated else: msg_lastupdated = "" out = self.tmpl_pageheader( weburl = weburl, ln = ln, headertitle = headertitle, sitename = sitename, supportemail = supportemail, description = description, keywords = keywords, userinfobox = userinfobox, navtrailbox = navtrailbox, pageheaderadd = pageheaderadd, languagebox = languagebox, ) + """
%(boxlefttop)s
%(boxlefttopadd)s
%(boxleftbottomadd)s
%(boxleftbottom)s
%(boxrighttop)s
%(boxrighttopadd)s
%(boxrightbottomadd)s
%(boxrightbottom)s

%(title)s

%(body)s
""" % { 'boxlefttop' : boxlefttop, 'boxlefttopadd' : boxlefttopadd, 'boxleftbottom' : boxleftbottom, 'boxleftbottomadd' : boxleftbottomadd, 'boxrighttop' : boxrighttop, 'boxrighttopadd' : boxrighttopadd, 'boxrightbottom' : boxrightbottom, 'boxrightbottomadd' : boxrightbottomadd, 'title' : title, 'body' : body, } + self.tmpl_pagefooter( weburl = weburl, ln = ln, sitename = sitename, supportemail = supportemail, version = version, lastupdated = lastupdated, languagebox = languagebox, pagefooteradd = pagefooteradd ) return out def tmpl_pageheader(self, weburl, ln, headertitle = "", sitename = "", supportemail = "", description = "", keywords = "", userinfobox = "", navtrailbox = "", pageheaderadd = "", languagebox = "", uid = 0, ): """Creates a page header Parameters: - 'weburl' *string* - The base URL for the site - 'ln' *string* - The language to display - 'sitename' *string* - the first part of the page HTML title - 'headertitle' *string* - the second part of the page HTML title - 'supportemail' *string* - email of the support team - 'description' *string* - description goes to the metadata in the header of the HTML page - 'keywords' *string* - keywords goes to the metadata in the header of the HTML page - 'userinfobox' *string* - the HTML code for the user information box - 'navtrailbox' *string* - the HTML code for the navigation trail box - 'pageheaderadd' *string* - additional page header HTML code - 'languagebox' *string* - the HTML code for the language box Output: - HTML code of the page headers """ # load the right message language _ = gettext_set_language(ln) out = """ %(sitename)s: %(headertitle)s """ % { 'weburl' : weburl, 'ln' : ln, 'sitename' : sitename, 'headertitle' : headertitle, 'supportemail' : supportemail, 'description' : description, 'keywords' : keywords, 'userinfobox' : userinfobox, 'navtrailbox' : navtrailbox, 'pageheaderadd' : pageheaderadd, 'msg_search' : _("Search"), 'msg_submit' : _("Submit"), 'msg_personalize' : _("Personalize"), 'msg_help' : _("Help"), 'languagebox' : languagebox, } return out def tmpl_pagefooter(self, weburl, ln, sitename = "", supportemail = "", version = "", lastupdated = None, languagebox = "", pagefooteradd = "" ): """Creates a page footer Parameters: - 'weburl' *string* - The base URL for the site - 'ln' *string* - The language to display - 'sitename' *string* - the first part of the page HTML title - 'supportemail' *string* - email of the support team - 'version' *string* - the version number of CDSware - 'lastupdated' *string* - when the page was last updated - 'languagebox' *string* - the HTML code for the language box - 'pagefooteradd' *string* - additional page footer HTML code Output: - HTML code of the page headers """ # load the right message language _ = gettext_set_language(ln) if lastupdated: msg_lastupdated = _("Last updated:") + " " + lastupdated else: msg_lastupdated = "" out = """ """ % { 'weburl' : weburl, 'ln' : ln, 'sitename' : sitename, 'supportemail' : supportemail, 'msg_search' : _("Search"), 'msg_submit' : _("Submit"), 'msg_personalize' : _("Personalize"), 'msg_help' : _("Help"), 'msg_poweredby' : _("Powered by"), 'msg_maintainedby' : _("Maintained by"), 'msg_lastupdated' : msg_lastupdated, 'languagebox' : languagebox, 'version' : version, 'pagefooteradd' : pagefooteradd, } return out def tmpl_language_selection_box(self, urlargs="", language="en"): """Take URLARGS and LANGUAGE and return textual language selection box for the given page. Parameters: - 'urlargs' *string* - The url args that helped produce this page - 'language' *string* - The selected language """ # load the right message language _ = gettext_set_language(language) out = "" for (lang, lang_namelong) in language_list_long(): if lang == language: out += """ %s   """ % lang_namelong else: if urlargs: urlargs = sre.sub(r'ln=.*?(&|$)', '', urlargs) if urlargs: if urlargs.endswith('&'): urlargs += "ln=%s" % lang else: urlargs += "&ln=%s" % lang else: urlargs = "ln=%s" % lang out += """ %s   """ % (urlargs, lang_namelong) return _("This site is also available in the following languages:") + "
" + out def tmpl_error_box(self, ln, title, verbose, req, supportemail, errors): """Produces an error box. Parameters: - 'title' *string* - The title of the error box - 'ln' *string* - The selected language - 'verbose' *bool* - If lots of information should be displayed - 'req' *object* - the request object - 'supportemail' *string* - the supportemail for this installation - 'errors' list of tuples (error_code, error_message) - #! todo """ # load the right message language _ = gettext_set_language(ln) info_not_available = _("N/A") if title == None: if errors: title = _("Error: %s") % errors[0][1] else: title = _("Internal Error") browser_s = _("Browser") if req: try: if req.headers_in.has_key('User-Agent'): browser_s += ': ' + req.headers_in['User-Agent'] else: browser_s += ': ' + info_not_available host_s = req.hostname page_s = req.unparsed_uri client_s = req.connection.remote_ip except: pass else: browser_s += ': ' + info_not_available host_s = page_s = client_s = info_not_available error_s = '' sys_error_s = '' traceback_s = '' if verbose >= 1: if sys.exc_info()[0]: sys_error_s = _("System Error: %s %s\n") % (sys.exc_info()[0], sys.exc_info()[1]) if errors: errs = '' for error_tuple in errors: try: errs += "%s%s : %s\n " % (' '*6, error_tuple[0], error_tuple[1]) except: errs += "%s%s\n" % (' '*6, error_tuple) errs = errs[6:-2] # get rid of trainling ',' error_s = _("Error: %s")% errs + "\n" else: error_s = _("Error") + ': ' + info_not_available if verbose >= 9: traceback_s = _("Traceback: \n%s") % string.join(traceback.format_tb(sys.exc_info()[2]),"\n") out = """

%(title)s %(sys1)s %(sys2)s

%(contact)s

 URI: http://%(host)s%(page)s
 %(time_label)s: %(time)s
 %(browser)s
 %(client_label)s: %(client)s
 %(error)s%(sys_error)s%(traceback)s
 
%(send_error_label)s
""" % { 'title' : title, 'time_label': _("Time"), 'client_label': _("Client"), 'send_error_label': _("Please send an error report to the Administrator"), 'send_label': _("Send error report"), 'sys1' : sys.exc_info()[0] or '', 'sys2' : sys.exc_info()[1] or '', 'contact' : _("Please contact %s quoting the following information:") % (urllib.quote(supportemail), supportemail), 'host' : host_s, 'page' : page_s, - 'time' : time.strftime("%02d/%b/%Y:%H:%M:%S %z"), + 'time' : time.strftime("%d/%b/%Y:%H:%M:%S %z"), 'browser' : browser_s, 'client' : client_s, 'error' : error_s, 'traceback' : traceback_s, 'sys_error' : sys_error_s, 'weburl' : weburl, 'referer' : page_s!=info_not_available and ("http://" + host_s + page_s) or info_not_available } return out