diff --git a/modules/webalert/lib/alert_engine.py b/modules/webalert/lib/alert_engine.py index 9d37f1521..47621032d 100644 --- a/modules/webalert/lib/alert_engine.py +++ b/modules/webalert/lib/alert_engine.py @@ -1,432 +1,433 @@ ## $Id$ ## Alert engine implementation. ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ##read config variables #include "config.wml" #include "configbis.wml" ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. """Alert engine implementation.""" ## rest of the Python code goes below __version__ = "$Id$" try: from cgi import parse_qs from sre import search, sub from time import localtime, strftime, mktime, sleep import smtplib from config import * from search_engine import perform_request_search from dbquery import run_sql from htmlparser import * from string import split except ImportError, e: print "Error: %s" % e import sys sys.exit(1) MAXIDS = 50 FROMADDR = 'CDS Alert Engine <%s>' % alertengineemail ALERTURL = weburl + '/youralerts.py/list' DEVELOPERADDR = ['erik.simon@cern.ch', 'tibor.simko@cern.ch'] # Debug levels: # 0 = production, nothing on the console, email sent # 1 = messages on the console, email sent # 2 = messages on the console, but no email sent # 3 = many messages on the console, no email sent # 4 = many messages on the console, email sent to DEVELOPERADDR DEBUGLEVEL = 4 def update_date_lastrun(alert): return run_sql('update user_query_basket set date_lastrun=%s where id_user=%s and id_query=%s and id_basket=%s;', (strftime("%Y-%m-%d"), alert[0], alert[1], alert[2],)) def get_alert_queries(frequency): return run_sql('select distinct id, urlargs from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.frequency=%s and uqb.date_lastrun <= now();', (frequency,)) def get_alert_queries_for_user(uid): return run_sql('select distinct id, urlargs, uqb.frequency from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.id_user=%s and uqb.date_lastrun <= now();', (uid,)) def get_alerts(query, frequency): r = run_sql('select id_user, id_query, id_basket, frequency, date_lastrun, alert_name, notification from user_query_basket where id_query=%s and frequency=%s;', (query['id_query'], frequency,)) return {'alerts': r, 'records': query['records'], 'argstr': query['argstr'], 'date_from': query['date_from'], 'date_until': query['date_until']} # def add_record_to_basket(record_id, basket_id): # if DEBUGLEVEL > 0: # print "-> adding record %s into basket %s" % (record_id, basket_id) # try: # return run_sql('insert into basket_record (id_basket, id_record) values(%s, %s);', (basket_id, record_id,)) # except: # return 0 # def add_records_to_basket(record_ids, basket_id): # # TBD: generate the list and all all records in one step (see below) # for i in record_ids: # add_record_to_basket(i, basket_id) # Optimized version: def add_records_to_basket(record_ids, basket_id): global DEBUGLEVEL nrec = len(record_ids) if nrec > 0: vals = '(%s,%s)' % (basket_id, record_ids[0]) if nrec > 1: for i in record_ids[1:]: vals += ',(%s, %s)' % (basket_id, i) if DEBUGLEVEL > 0: print "-> adding %s records into basket %s: %s" % (nrec, basket_id, vals) try: if DEBUGLEVEL < 4: return run_sql('insert into basket_record (id_basket, id_record) values %s;' % vals) # Cannot use the run_sql(, (,)) form for some reason else: print ' NOT ADDED, DEBUG LEVEL == 4' return 0 except: return 0 else: return 0 def get_email(uid): r = run_sql('select email from user where id=%s', (uid,)) return r[0][0] def get_query(alert_id): r = run_sql('select urlargs from query where id=%s', (alert_id,)) return r[0][0] def send_email(fromaddr, toaddr, body): global DEBUGLEVEL try: server = smtplib.SMTP('localhost') if DEBUGLEVEL > 2: server.set_debuglevel(1) else: server.set_debuglevel(0) server.sendmail(fromaddr, toaddr, body) server.quit() except: print 'Error connecting to SMTP server, retrying in 10 seconds.' sleep(10) send_email(fromaddr, toaddr, body) def forge_email(fromaddr, toaddr, subject, content): body = 'From: %s\nTo: %s\nContent-Type: text/plain; charset=utf-8\nSubject: %s\n%s' % (fromaddr, toaddr, subject, content) return body def format_frequency(freq): frequency = freq if frequency == "day": return 'daily' else: return frequency + 'ly' def print_records(record_ids): global MAXIDS msg = '' c = 1 for i in record_ids: if c > MAXIDS: break msg += '\n\n%s) %s' % (c, get_as_text(i)) c += 1 if c > MAXIDS: msg += '\n\n' + wrap('Only the first %s records are displayed above. Please consult the URL below to see all the results.' % MAXIDS) return msg def email_notify(alert, records, argstr): global FROMADDR global ALERTURL global DEBUGLEVEL global DEVELOPERADDR if len(records) == 0: return msg = "" if DEBUGLEVEL > 0: msg = "*** THIS MESSAGE WAS SENT IN DEBUG MODE, DON'T TAKE IT INTO ACCOUNT ***\n\n" msg += "Hello\n\n" msg += wrap("Below are the results of the email alert that you set up with the CERN Document Server. This is an automatic message, please don't reply to its address. For any question, use <%s> instead." % supportemail) email = get_email(alert[0]) url = weburl + "/search.py?" + argstr pattern = get_pattern(argstr) catalogue = get_catalogue(argstr) catword = 'catalogue' if get_catalogue_num(argstr) > 1: catword += 's' time = strftime("%d-%m-%Y") msg += '\n' + wrap('alert name: %s' % alert[5]) msg += wrap('pattern: \'%s\'' % pattern) if catalogue: msg += wrap('%s: %s' % (catword, catalogue)) msg += wrap('frequency: %s ' % format_frequency(alert[3])) msg += wrap('run time: %s ' % time) - msg += wrap('found: %s record' % len(records)) + recword = 'record' if len(records) > 1: - msg += 's' + recword += 's' + msg += wrap('found: %s %s' % (len(records), recword)) msg += "\nurl: <%s/search.py?%s>\n" % (weburl, argstr) msg += wrap_records(print_records(records)) msg += "-- \nCERN Document Server Alert Service <%s>\nUnsubscribe at <%s>\nNeed human intervention? Contact <%s>" % (weburl, ALERTURL, supportemail) subject = 'Alert %s run on %s' % (alert[5], time) body = forge_email(FROMADDR, email, subject, msg) if DEBUGLEVEL > 0: print "********************************************************************************" print body print "********************************************************************************" if DEBUGLEVEL < 2: send_email(FROMADDR, email, body) if DEBUGLEVEL == 4: for a in DEVELOPERADDR: send_email(FROMADDR, a, body) def get_argument(args, argname): if args.has_key(argname): return args[argname] else: return [] def get_record_ids(argstr, date_from, date_until): args = parse_qs(argstr) p = get_argument(args, 'p') c = get_argument(args, 'c') cc = get_argument(args, 'cc') as = get_argument(args, 'as') f = get_argument(args, 'f') rg = get_argument(args, 'rg') so = get_argument(args, 'so') sp = get_argument(args, 'sp') ot = get_argument(args, 'ot') as = get_argument(args, 'as') p1 = get_argument(args, 'p1') f1 = get_argument(args, 'f1') m1 = get_argument(args, 'm1') op1 = get_argument(args, 'op1') p2 = get_argument(args, 'p2') f2 = get_argument(args, 'f2') m2 = get_argument(args, 'm2') op2 = get_argument(args, 'op2') p3 = get_argument(args, 'p3') f3 = get_argument(args, 'f3') m3 = get_argument(args, 'm3') sc = get_argument(args, 'sc') # search = get_argument(args, 'search') d1y, d1m, d1d = date_from d2y, d2m, d2d = date_until return perform_request_search(of='id', p=p, c=c, cc=cc, f=f, so=so, sp=sp, ot=ot, as=as, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, sc=sc, d1y=d1y, d1m=d1m, d1d=d1d, d2y=d2y, d2m=d2m, d2d=d2d) def get_argument_as_string(argstr, argname): args = parse_qs(argstr) a = get_argument(args, argname) r = '' if len(a): r = a[0] for i in a[1:len(a)]: r += ", %s" % i return r def get_pattern(argstr): return get_argument_as_string(argstr, 'p') def get_catalogue(argstr): return get_argument_as_string(argstr, 'c') def get_catalogue_num(argstr): args = parse_qs(argstr) a = get_argument(args, 'c') return len(a) def get_date_from(time, freq): t = mktime(time) if freq == 'day': time2 = localtime(t - 86400) elif freq == 'month': m = time[1] - 1 y = time[0] if m == 0: m = 12 y -= 1 time2 = (y, m, time[2], time[3], time[4], time[5], time[6], time[7], time[8]) elif freq == 'week': time2 = localtime(t - 604800) ystr = strftime("%Y", time2) mstr = strftime("%m", time2) dstr = strftime("%d", time2) return (ystr, mstr, dstr) def run_query(query, frequency): """Return a dictionary containing the information of the performed query. The information contains the id of the query, the arguments as a string, and the list of found records.""" time = localtime() # Override time here for testing purposes (beware of localtime offset): #time = (2002, 12, 21, 2, 0, 0, 2, 120, 1) # Override frequency here for testing #frequency = 'week' ystr = strftime("%Y", time) mstr = strftime("%m", time) dstr = strftime("%d", time) date_until = (ystr, mstr, dstr) date_from = get_date_from(time, frequency) recs = get_record_ids(query[1], date_from, date_until) if DEBUGLEVEL > 2: print "[%s] run query: %s with dates: from=%s, until=%s\n found rec ids: %s" % (strftime("%c"), query, date_from, date_until, recs) return {'id_query': query[0], 'argstr': query[1], 'records': recs, 'date_from': date_from, 'date_until': date_until} def process_alert_queries(frequency): """Run the alerts according to the frequency. Retrieves the queries for which an alert exists, performs it, and processes the corresponding alerts.""" alert_queries = get_alert_queries(frequency) for aq in alert_queries: q = run_query(aq, frequency) alerts = get_alerts(q, frequency) process_alerts(alerts) def replace_argument(argstr, argname, argval): """Replace the given date argument value with the new one. If the argument is missing, it is added.""" if search('%s=\d+' % argname, argstr): r = sub('%s=\d+' % argname, '%s=%s' % (argname, argval), argstr) else: r = argstr + '&%s=%s' % (argname, argval) return r def update_arguments(argstr, date_from, date_until): """Replace date arguments in argstr with the ones specified by date_from and date_until. Absent arguments are added.""" d1y, d1m, d1d = date_from d2y, d2m, d2d = date_until r = replace_argument(argstr, 'd1y', d1y) r = replace_argument(r, 'd1m', d1m) r = replace_argument(r, 'd1d', d1d) r = replace_argument(r, 'd2y', d2y) r = replace_argument(r, 'd2m', d2m) r = replace_argument(r, 'd2d', d2d) return r def process_alerts(alerts): # TBD: do not generate the email each time, forge it once and then # send it to all appropriate people for a in alerts['alerts']: if alert_use_basket_p(a): add_records_to_basket(alerts['records'], a[2]) if alert_use_notification_p(a): argstr = update_arguments(alerts['argstr'], alerts['date_from'], alerts['date_until']) email_notify(a, alerts['records'], argstr) update_date_lastrun(a) def alert_use_basket_p(alert): return alert[2] != 0 def alert_use_notification_p(alert): return alert[6] == 'y' def run_alerts(): """Run the alerts. First decide which alerts to run according to the current local time, and runs them.""" t = localtime() if t[2] == 1: # first of the month process_alert_queries('month') t = strftime("%A") if t == 'Monday': # first day of the week process_alert_queries('week') process_alert_queries('day') def process_alert_queries_for_user(uid): """Process the alerts for the given user id. All alerts are with reference date set as the current local time.""" alert_queries = get_alert_queries_for_user(uid) print alert_queries for aq in alert_queries: frequency = aq[2] q = run_query(aq, frequency) alerts = get_alerts(q, frequency) process_alerts(alerts) if __name__ == '__main__': process_alert_queries_for_user(2571422) # erik process_alert_queries_for_user(109) # tibor # process_alert_queries_for_user(11040) # jean-yves diff --git a/modules/webalert/lib/alert_engine.py.wml b/modules/webalert/lib/alert_engine.py.wml index 47621032d..0686372ff 100644 --- a/modules/webalert/lib/alert_engine.py.wml +++ b/modules/webalert/lib/alert_engine.py.wml @@ -1,433 +1,433 @@ ## $Id$ ## Alert engine implementation. ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ##read config variables #include "config.wml" #include "configbis.wml" ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. """Alert engine implementation.""" ## rest of the Python code goes below __version__ = "$Id$" try: from cgi import parse_qs from sre import search, sub from time import localtime, strftime, mktime, sleep import smtplib from config import * from search_engine import perform_request_search from dbquery import run_sql from htmlparser import * from string import split except ImportError, e: print "Error: %s" % e import sys sys.exit(1) MAXIDS = 50 FROMADDR = 'CDS Alert Engine <%s>' % alertengineemail ALERTURL = weburl + '/youralerts.py/list' DEVELOPERADDR = ['erik.simon@cern.ch', 'tibor.simko@cern.ch'] # Debug levels: # 0 = production, nothing on the console, email sent # 1 = messages on the console, email sent # 2 = messages on the console, but no email sent # 3 = many messages on the console, no email sent # 4 = many messages on the console, email sent to DEVELOPERADDR DEBUGLEVEL = 4 def update_date_lastrun(alert): return run_sql('update user_query_basket set date_lastrun=%s where id_user=%s and id_query=%s and id_basket=%s;', (strftime("%Y-%m-%d"), alert[0], alert[1], alert[2],)) def get_alert_queries(frequency): return run_sql('select distinct id, urlargs from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.frequency=%s and uqb.date_lastrun <= now();', (frequency,)) def get_alert_queries_for_user(uid): return run_sql('select distinct id, urlargs, uqb.frequency from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.id_user=%s and uqb.date_lastrun <= now();', (uid,)) def get_alerts(query, frequency): r = run_sql('select id_user, id_query, id_basket, frequency, date_lastrun, alert_name, notification from user_query_basket where id_query=%s and frequency=%s;', (query['id_query'], frequency,)) return {'alerts': r, 'records': query['records'], 'argstr': query['argstr'], 'date_from': query['date_from'], 'date_until': query['date_until']} # def add_record_to_basket(record_id, basket_id): # if DEBUGLEVEL > 0: # print "-> adding record %s into basket %s" % (record_id, basket_id) # try: # return run_sql('insert into basket_record (id_basket, id_record) values(%s, %s);', (basket_id, record_id,)) # except: # return 0 # def add_records_to_basket(record_ids, basket_id): # # TBD: generate the list and all all records in one step (see below) # for i in record_ids: # add_record_to_basket(i, basket_id) # Optimized version: def add_records_to_basket(record_ids, basket_id): global DEBUGLEVEL nrec = len(record_ids) if nrec > 0: vals = '(%s,%s)' % (basket_id, record_ids[0]) if nrec > 1: for i in record_ids[1:]: vals += ',(%s, %s)' % (basket_id, i) if DEBUGLEVEL > 0: print "-> adding %s records into basket %s: %s" % (nrec, basket_id, vals) try: if DEBUGLEVEL < 4: return run_sql('insert into basket_record (id_basket, id_record) values %s;' % vals) # Cannot use the run_sql(, (,)) form for some reason else: print ' NOT ADDED, DEBUG LEVEL == 4' return 0 except: return 0 else: return 0 def get_email(uid): r = run_sql('select email from user where id=%s', (uid,)) return r[0][0] def get_query(alert_id): r = run_sql('select urlargs from query where id=%s', (alert_id,)) return r[0][0] def send_email(fromaddr, toaddr, body): global DEBUGLEVEL try: server = smtplib.SMTP('localhost') if DEBUGLEVEL > 2: server.set_debuglevel(1) else: server.set_debuglevel(0) server.sendmail(fromaddr, toaddr, body) server.quit() except: print 'Error connecting to SMTP server, retrying in 10 seconds.' sleep(10) send_email(fromaddr, toaddr, body) def forge_email(fromaddr, toaddr, subject, content): body = 'From: %s\nTo: %s\nContent-Type: text/plain; charset=utf-8\nSubject: %s\n%s' % (fromaddr, toaddr, subject, content) return body def format_frequency(freq): frequency = freq if frequency == "day": return 'daily' else: return frequency + 'ly' def print_records(record_ids): global MAXIDS msg = '' c = 1 for i in record_ids: if c > MAXIDS: break msg += '\n\n%s) %s' % (c, get_as_text(i)) c += 1 if c > MAXIDS: msg += '\n\n' + wrap('Only the first %s records are displayed above. Please consult the URL below to see all the results.' % MAXIDS) return msg def email_notify(alert, records, argstr): global FROMADDR global ALERTURL global DEBUGLEVEL global DEVELOPERADDR if len(records) == 0: return msg = "" if DEBUGLEVEL > 0: msg = "*** THIS MESSAGE WAS SENT IN DEBUG MODE, DON'T TAKE IT INTO ACCOUNT ***\n\n" msg += "Hello\n\n" msg += wrap("Below are the results of the email alert that you set up with the CERN Document Server. This is an automatic message, please don't reply to its address. For any question, use <%s> instead." % supportemail) email = get_email(alert[0]) url = weburl + "/search.py?" + argstr pattern = get_pattern(argstr) catalogue = get_catalogue(argstr) catword = 'catalogue' if get_catalogue_num(argstr) > 1: catword += 's' time = strftime("%d-%m-%Y") msg += '\n' + wrap('alert name: %s' % alert[5]) msg += wrap('pattern: \'%s\'' % pattern) if catalogue: msg += wrap('%s: %s' % (catword, catalogue)) msg += wrap('frequency: %s ' % format_frequency(alert[3])) msg += wrap('run time: %s ' % time) recword = 'record' if len(records) > 1: recword += 's' msg += wrap('found: %s %s' % (len(records), recword)) - msg += "\nurl: <%s/search.py?%s>\n" % (weburl, argstr) + msg += "url: <%s/search.py?%s>\n" % (weburl, argstr) msg += wrap_records(print_records(records)) msg += "-- \nCERN Document Server Alert Service <%s>\nUnsubscribe at <%s>\nNeed human intervention? Contact <%s>" % (weburl, ALERTURL, supportemail) subject = 'Alert %s run on %s' % (alert[5], time) body = forge_email(FROMADDR, email, subject, msg) if DEBUGLEVEL > 0: print "********************************************************************************" print body print "********************************************************************************" if DEBUGLEVEL < 2: send_email(FROMADDR, email, body) if DEBUGLEVEL == 4: for a in DEVELOPERADDR: send_email(FROMADDR, a, body) def get_argument(args, argname): if args.has_key(argname): return args[argname] else: return [] def get_record_ids(argstr, date_from, date_until): args = parse_qs(argstr) p = get_argument(args, 'p') c = get_argument(args, 'c') cc = get_argument(args, 'cc') as = get_argument(args, 'as') f = get_argument(args, 'f') rg = get_argument(args, 'rg') so = get_argument(args, 'so') sp = get_argument(args, 'sp') ot = get_argument(args, 'ot') as = get_argument(args, 'as') p1 = get_argument(args, 'p1') f1 = get_argument(args, 'f1') m1 = get_argument(args, 'm1') op1 = get_argument(args, 'op1') p2 = get_argument(args, 'p2') f2 = get_argument(args, 'f2') m2 = get_argument(args, 'm2') op2 = get_argument(args, 'op2') p3 = get_argument(args, 'p3') f3 = get_argument(args, 'f3') m3 = get_argument(args, 'm3') sc = get_argument(args, 'sc') # search = get_argument(args, 'search') d1y, d1m, d1d = date_from d2y, d2m, d2d = date_until return perform_request_search(of='id', p=p, c=c, cc=cc, f=f, so=so, sp=sp, ot=ot, as=as, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, sc=sc, d1y=d1y, d1m=d1m, d1d=d1d, d2y=d2y, d2m=d2m, d2d=d2d) def get_argument_as_string(argstr, argname): args = parse_qs(argstr) a = get_argument(args, argname) r = '' if len(a): r = a[0] for i in a[1:len(a)]: r += ", %s" % i return r def get_pattern(argstr): return get_argument_as_string(argstr, 'p') def get_catalogue(argstr): return get_argument_as_string(argstr, 'c') def get_catalogue_num(argstr): args = parse_qs(argstr) a = get_argument(args, 'c') return len(a) def get_date_from(time, freq): t = mktime(time) if freq == 'day': time2 = localtime(t - 86400) elif freq == 'month': m = time[1] - 1 y = time[0] if m == 0: m = 12 y -= 1 time2 = (y, m, time[2], time[3], time[4], time[5], time[6], time[7], time[8]) elif freq == 'week': time2 = localtime(t - 604800) ystr = strftime("%Y", time2) mstr = strftime("%m", time2) dstr = strftime("%d", time2) return (ystr, mstr, dstr) def run_query(query, frequency): """Return a dictionary containing the information of the performed query. The information contains the id of the query, the arguments as a string, and the list of found records.""" time = localtime() # Override time here for testing purposes (beware of localtime offset): #time = (2002, 12, 21, 2, 0, 0, 2, 120, 1) # Override frequency here for testing #frequency = 'week' ystr = strftime("%Y", time) mstr = strftime("%m", time) dstr = strftime("%d", time) date_until = (ystr, mstr, dstr) date_from = get_date_from(time, frequency) recs = get_record_ids(query[1], date_from, date_until) if DEBUGLEVEL > 2: print "[%s] run query: %s with dates: from=%s, until=%s\n found rec ids: %s" % (strftime("%c"), query, date_from, date_until, recs) return {'id_query': query[0], 'argstr': query[1], 'records': recs, 'date_from': date_from, 'date_until': date_until} def process_alert_queries(frequency): """Run the alerts according to the frequency. Retrieves the queries for which an alert exists, performs it, and processes the corresponding alerts.""" alert_queries = get_alert_queries(frequency) for aq in alert_queries: q = run_query(aq, frequency) alerts = get_alerts(q, frequency) process_alerts(alerts) def replace_argument(argstr, argname, argval): """Replace the given date argument value with the new one. If the argument is missing, it is added.""" if search('%s=\d+' % argname, argstr): r = sub('%s=\d+' % argname, '%s=%s' % (argname, argval), argstr) else: r = argstr + '&%s=%s' % (argname, argval) return r def update_arguments(argstr, date_from, date_until): """Replace date arguments in argstr with the ones specified by date_from and date_until. Absent arguments are added.""" d1y, d1m, d1d = date_from d2y, d2m, d2d = date_until r = replace_argument(argstr, 'd1y', d1y) r = replace_argument(r, 'd1m', d1m) r = replace_argument(r, 'd1d', d1d) r = replace_argument(r, 'd2y', d2y) r = replace_argument(r, 'd2m', d2m) r = replace_argument(r, 'd2d', d2d) return r def process_alerts(alerts): # TBD: do not generate the email each time, forge it once and then # send it to all appropriate people for a in alerts['alerts']: if alert_use_basket_p(a): add_records_to_basket(alerts['records'], a[2]) if alert_use_notification_p(a): argstr = update_arguments(alerts['argstr'], alerts['date_from'], alerts['date_until']) email_notify(a, alerts['records'], argstr) update_date_lastrun(a) def alert_use_basket_p(alert): return alert[2] != 0 def alert_use_notification_p(alert): return alert[6] == 'y' def run_alerts(): """Run the alerts. First decide which alerts to run according to the current local time, and runs them.""" t = localtime() if t[2] == 1: # first of the month process_alert_queries('month') t = strftime("%A") if t == 'Monday': # first day of the week process_alert_queries('week') process_alert_queries('day') def process_alert_queries_for_user(uid): """Process the alerts for the given user id. All alerts are with reference date set as the current local time.""" alert_queries = get_alert_queries_for_user(uid) print alert_queries for aq in alert_queries: frequency = aq[2] q = run_query(aq, frequency) alerts = get_alerts(q, frequency) process_alerts(alerts) if __name__ == '__main__': process_alert_queries_for_user(2571422) # erik process_alert_queries_for_user(109) # tibor # process_alert_queries_for_user(11040) # jean-yves diff --git a/modules/webalert/lib/htmlparser.py b/modules/webalert/lib/htmlparser.py index 9d6325a91..a093e112d 100644 --- a/modules/webalert/lib/htmlparser.py +++ b/modules/webalert/lib/htmlparser.py @@ -1,136 +1,136 @@ ## $Id$ ## HTML parser for records. ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ##read config variables #include "config.wml" #include "configbis.wml" ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. """HTML parser for records.""" ## rest of the Python code goes below __version__ = "$Id$" try: from config import * from search_engine import print_record from HTMLParser import HTMLParser import textwrap from string import split except ImportError, e: print "Error: %s" % e import sys sys.exit(1) WRAPWIDTH = 72 def wrap(text): global WRAPWIDTH lines = textwrap.wrap(text, WRAPWIDTH) r = '' for l in lines: r += l + '\n' return r def wrap_records(text): global WRAPWIDTH lines = split(text, '\n') result = '' for l in lines: newlines = textwrap.wrap(l, WRAPWIDTH) for ll in newlines: result += ll + '\n' return result class RecordHTMLParser(HTMLParser): """A parser for the HTML returned by cdsware.search_engine.print_record. The parser provides methods to transform the HTML returned by cdsware.search_engine.print_record into plain text, with some minor formatting. """ def __init__(self): HTMLParser.__init__(self) self.result = '' def handle_starttag(self, tag, attrs): if tag == 'strong': # self.result += '*' pass elif tag == 'a': self.printURL = 0 self.unclosedBracket = 0 for f in attrs: if f[1] == 'note': self.result += 'Fulltext : <' self.unclosedBracket = 1 if f[1] == 'moreinfo': self.result += 'Detailed record : ' self.printURL = 1 if (self.printURL == 1) and (f[0] == 'href'): self.result += '<' + f[1] + '>' elif tag == 'br': self.result += '\n' def handle_endtag(self, tag): if tag == 'strong': # self.result += '\n' pass elif tag == 'a': if self.unclosedBracket == 1: self.result += '>' self.unclosedBracket = 0 def handle_data(self, data): if data == 'Detailed record': pass else: self.result += data def handle_comment(self, data): pass def get_as_text(record_id): """Return the plain text from RecordHTMLParser of the record.""" rec = print_record(record_id) htparser = RecordHTMLParser() try: htparser.feed(rec) return htparser.result except: - htparser.close() - return htparser.result + '\n**HTML Error detected in record , contact <%s>.' % (record_id, supportemail) + #htparser.close() + return wrap(htparser.result + '\n**HTML Error detected in record , contact <%s>.' % (record_id, supportemail)) if __name__ == "__main__": rec = print_record(619028) print rec print "***" print get_as_text(619028) diff --git a/modules/webalert/lib/htmlparser.py.wml b/modules/webalert/lib/htmlparser.py.wml index 9d6325a91..a093e112d 100644 --- a/modules/webalert/lib/htmlparser.py.wml +++ b/modules/webalert/lib/htmlparser.py.wml @@ -1,136 +1,136 @@ ## $Id$ ## HTML parser for records. ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ##read config variables #include "config.wml" #include "configbis.wml" ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. """HTML parser for records.""" ## rest of the Python code goes below __version__ = "$Id$" try: from config import * from search_engine import print_record from HTMLParser import HTMLParser import textwrap from string import split except ImportError, e: print "Error: %s" % e import sys sys.exit(1) WRAPWIDTH = 72 def wrap(text): global WRAPWIDTH lines = textwrap.wrap(text, WRAPWIDTH) r = '' for l in lines: r += l + '\n' return r def wrap_records(text): global WRAPWIDTH lines = split(text, '\n') result = '' for l in lines: newlines = textwrap.wrap(l, WRAPWIDTH) for ll in newlines: result += ll + '\n' return result class RecordHTMLParser(HTMLParser): """A parser for the HTML returned by cdsware.search_engine.print_record. The parser provides methods to transform the HTML returned by cdsware.search_engine.print_record into plain text, with some minor formatting. """ def __init__(self): HTMLParser.__init__(self) self.result = '' def handle_starttag(self, tag, attrs): if tag == 'strong': # self.result += '*' pass elif tag == 'a': self.printURL = 0 self.unclosedBracket = 0 for f in attrs: if f[1] == 'note': self.result += 'Fulltext : <' self.unclosedBracket = 1 if f[1] == 'moreinfo': self.result += 'Detailed record : ' self.printURL = 1 if (self.printURL == 1) and (f[0] == 'href'): self.result += '<' + f[1] + '>' elif tag == 'br': self.result += '\n' def handle_endtag(self, tag): if tag == 'strong': # self.result += '\n' pass elif tag == 'a': if self.unclosedBracket == 1: self.result += '>' self.unclosedBracket = 0 def handle_data(self, data): if data == 'Detailed record': pass else: self.result += data def handle_comment(self, data): pass def get_as_text(record_id): """Return the plain text from RecordHTMLParser of the record.""" rec = print_record(record_id) htparser = RecordHTMLParser() try: htparser.feed(rec) return htparser.result except: - htparser.close() - return htparser.result + '\n**HTML Error detected in record , contact <%s>.' % (record_id, supportemail) + #htparser.close() + return wrap(htparser.result + '\n**HTML Error detected in record , contact <%s>.' % (record_id, supportemail)) if __name__ == "__main__": rec = print_record(619028) print rec print "***" print get_as_text(619028)