Page MenuHomec4science

sessiongc.wml
No OneTemporary

File Metadata

Created
Fri, Jul 12, 06:49

sessiongc.wml

## $Id$
## BibWords bibliographic data, reference and fulltext indexing utility.
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
## read config variables:
#include "config.wml"
#include "configbis.wml"
#include "cdswmllib.wml"
## start Python:
<protect>#!</protect><PYTHON>
<protect># -*- coding: utf-8 -*-</protect>
<protect>## $Id$</protect>
<protect>## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
"""
Guest user sessions garbage collector. To be run via cron once per day. (say)
"""
__version__ = "<: print generate_pretty_version_string('$Id$'); :>"
## fill config variables:
pylibdir = "<LIBDIR>/python"
## okay, rest of the Python code goes below
#######
import sys
try:
sys.path.append('%s' % pylibdir)
from cdsware.dbquery import run_sql
import getopt
import time
except ImportError, e:
print "Error: %s" % (e, )
sys.exit(1)
# configure variables
cfg_mysql_argumentlist_size = 100
def guest_user_garbage_collector(verbose=1):
"""Session Garbage Collector
program flow/tasks:
1: delete expired sessions
1b:delete guest users without session
2: delete queries not attached to any user
3: delete baskets not attached to any user
4: delete alerts not attached to any user
verbose - level of program output.
0 - nothing
1 - default
9 - max, debug"""
# dictionary used to keep track of number of deleted entries
delcount = {'session': 0,
'user': 0,
'user_query': 0,
'query': 0,
'basket': 0,
'user_basket': 0,
'user_query_basket': 0}
if verbose: print """\nGUEST USER SESSIONS GARBAGE COLLECTOR STARTED: %s.\n""" % (time.ctime(), )
# 1 - DELETE EXPIRED SESSIONS
if verbose: print "- deleting expired sessions"
timelimit = time.time()
if verbose >= 9: print """ DELETE FROM session WHERE session_expiry < %d \n""" % (timelimit, )
delcount['session'] += run_sql("""DELETE FROM session WHERE session_expiry < %s """ % (timelimit, ))
# 1b - DELETE GUEST USERS WITHOUT SESSION
if verbose: print "- deleting guest users without session"
# get uids
if verbose >= 9: print """ SELECT u.id\n FROM user AS u LEFT JOIN session AS s\n ON u.id = s.uid\n WHERE s.uid IS NULL AND u.email = ''"""
result = run_sql("""SELECT u.id
FROM user AS u LEFT JOIN session AS s
ON u.id = s.uid
WHERE s.uid IS NULL AND u.email = ''""")
if verbose >= 9: print result
if result:
# work on slices of result list in case of big result
for i in range(0, len(result), cfg_mysql_argumentlist_size):
# create string of uids
uidstr = ''
for (id_user, ) in result[i:i+cfg_mysql_argumentlist_size]:
if uidstr: uidstr += ','
uidstr += "%s" % (id_user, )
# delete users
if verbose >= 9: print """ DELETE FROM user WHERE id IN (TRAVERSE LAST RESULT) AND email = '' \n"""
delcount['user'] += run_sql("""DELETE FROM user WHERE id IN (%s) AND email = ''""" % (uidstr, ))
# 2 - DELETE QUERIES NOT ATTACHED TO ANY USER
# first step, delete from user_query
if verbose: print "- deleting user_queries referencing non-existent users"
# find user_queries referencing non-existent users
if verbose >= 9: print """ SELECT DISTINCT uq.id_user\n FROM user_query AS uq LEFT JOIN user AS u\n ON uq.id_user = u.id\n WHERE u.id IS NULL"""
result = run_sql("""SELECT DISTINCT uq.id_user
FROM user_query AS uq LEFT JOIN user AS u
ON uq.id_user = u.id
WHERE u.id IS NULL""")
if verbose >= 9: print result
# delete in user_query one by one
if verbose >= 9: print """ DELETE FROM user_query WHERE id_user = 'TRAVERSE LAST RESULT' \n"""
for (id_user, ) in result:
delcount['user_query'] += run_sql("""DELETE FROM user_query WHERE id_user = %s""" % (id_user, ))
# delete the actual queries
if verbose: print "- deleting queries not attached to any user"
# select queries that must be deleted
if verbose >= 9: print """ SELECT DISTINCT q.id\n FROM query AS q LEFT JOIN user_query AS uq\n ON uq.id_query = q.id\n WHERE uq.id_query IS NULL AND\n q.type <> 'p' """
result = run_sql("""SELECT DISTINCT q.id
FROM query AS q LEFT JOIN user_query AS uq
ON uq.id_query = q.id
WHERE uq.id_query IS NULL AND
q.type <> 'p'""")
if verbose >= 9: print result
# delete queries one by one
if verbose >= 9: print """ DELETE FROM query WHERE id = 'TRAVERSE LAST RESULT \n"""
for (id_user, ) in result:
delcount['query'] += run_sql("""DELETE FROM query WHERE id = %s""" % (id_user, ))
# 3 - DELETE BASKETS NOT OWNED BY ANY USER
if verbose: print "- deleting baskets not owned by any user"
# select basket ids
if verbose >= 9: print """ SELECT ub.id_basket\n FROM user_basket AS ub LEFT JOIN user AS u\n ON u.id = ub.id_user\n WHERE u.id IS NULL"""
result = run_sql("""SELECT ub.id_basket
FROM user_basket AS ub LEFT JOIN user AS u
ON u.id = ub.id_user
WHERE u.id IS NULL""")
if verbose >= 9: print result
# delete from user_basket and basket one by one
if verbose >= 9:
print """ DELETE FROM user_basket WHERE id_basket = 'TRAVERSE LAST RESULT' """
print """ DELETE FROM basket WHERE id = 'TRAVERSE LAST RESULT' \n"""
for (id_basket, ) in result:
delcount['user_basket'] += run_sql("""DELETE FROM user_basket WHERE id_basket = %s""" % (id_basket, ))
delcount['basket'] += run_sql("""DELETE FROM basket WHERE id = %s""" % (id_basket, ))
# 4 - DELETE ALERTS NOT OWNED BY ANY USER
if verbose: print '- deleting alerts not owned by any user'
# select user ids in uqb that reference non-existent users
if verbose >= 9: print """SELECT DISTINCT uqb.id_user FROM user_query_basket AS uqb LEFT JOIN user AS u ON uqb.id_user = u.id WHERE u.id IS NULL"""
result = run_sql("""SELECT DISTINCT uqb.id_user FROM user_query_basket AS uqb LEFT JOIN user AS u ON uqb.id_user = u.id WHERE u.id IS NULL""")
if verbose >= 9: print result
# delete all these entries
for (id_user, ) in result:
if verbose >= 9: print """DELETE FROM user_query_basket WHERE id_user = 'TRAVERSE LAST RESULT """
delcount['user_query_basket'] += run_sql("""DELETE FROM user_query_basket WHERE id_user = %s """ % (id_user, ))
# PRINT STATISTICS
if verbose:
print """\nSTATISTICS - DELETED DATA: """
print """- %7s sessions.""" % (delcount['session'], )
print """- %7s users.""" % (delcount['user'], )
print """- %7s user_queries.""" % (delcount['user_query'], )
print """- %7s queries.""" % (delcount['query'], )
print """- %7s baskets.""" % (delcount['basket'], )
print """- %7s user_baskets.""" % (delcount['user_basket'], )
print """- %7s user_query_baskets.""" % (delcount['user_query_basket'], )
print """\nGUEST USER SESSIONS GARBAGE COLLECTOR FINISHED: %s. """ % (time.ctime(), )
print """\nEXECUTION LASTED %.2f SECONDS.\n""" % (time.time() - timelimit, )
return
def usage(exitcode=1, msg=""):
"""Prints usage info."""
if msg:
sys.stderr.write("Error: %s.\n" % msg)
sys.stderr.write("Usage: %s [options]\n" % sys.argv[0])
sys.stderr.write("General options:\n")
sys.stderr.write(" -h, --help \t\t Print this help.\n")
sys.stderr.write(" -V, --version \t\t Print version information.\n")
sys.stderr.write(" -v, --verbose=LEVEL \t Verbose level (0=min, 1=default, 9=max).\n")
sys.exit(exitcode)
def test_insertdata():
"""insert testdata for the garbage collector.
something will be deleted, other data kept.
test_checkdata() checks if the remains are correct."""
test_deletedata_nooutput()
print 'insert into session 6'
for (key, uid) in [('23A', 2000), ('24B', 2100), ('25C', 2200), ('26D', 2300)]:
run_sql("""INSERT INTO session (session_key, session_expiry, uid) values ('%s', %d, %s) """ % (key, time.time(), uid))
for (key, uid) in [('27E', 2400), ('28F', 2500)]:
run_sql("""INSERT INTO session (session_key, session_expiry, uid) values ('%s', %d, %s) """ % (key, time.time()+20000, uid))
print 'insert into user 6'
for id in range(2000, 2600, 100):
run_sql("""INSERT INTO user (id, email) values (%s, '') """ % (id, ))
print 'insert into user_query 6'
for (id_user, id_query) in [(2000, 155), (2100, 231), (2200, 155), (2300, 574), (2400, 155), (2500, 988)]:
run_sql("""INSERT INTO user_query (id_user, id_query) values (%s, %s) """ % (id_user, id_query))
print 'insert into query 4'
for (id, urlargs) in [(155, 'p=cern'), (231, 'p=muon'), (574, 'p=physics'), (988, 'cc=Atlantis+Institute+of+Science&as=0&p=')]:
run_sql("""INSERT INTO query (id, type, urlargs) values (%s, 'r', '%s') """ % (id, urlargs))
print 'insert into basket 4'
for (id, name) in [(6, 'general'), (7, 'physics'), (8, 'cern'), (9, 'diverse')]:
run_sql("""INSERT INTO basket (id, name, public) values (%s, '%s', 'n')""" % (id, name))
print 'insert into user_basket 4'
for (id_user, id_basket) in [(2000, 6), (2200, 7), (2200, 8), (2500, 9)]:
run_sql("""INSERT INTO user_basket (id_user, id_basket) values (%s, %s) """ % (id_user, id_basket))
print 'insert into user_query_basket 2'
for (id_user, id_query, id_basket) in [(2200, 155, 6), (2500, 988, 9)]:
run_sql("""INSERT INTO user_query_basket (id_user, id_query, id_basket) values (%s, %s, %s) """ % (id_user, id_query, id_basket))
def test_deletedata():
"""deletes all the testdata inserted in the insert function.
outputs how many entries are deleted"""
print 'delete from session',
print run_sql("DELETE FROM session WHERE uid IN (2000,2100,2200,2300,2400,2500) ")
print 'delete from user',
print run_sql("DELETE FROM user WHERE id IN (2000,2100,2200,2300,2400,2500) ")
print 'delete from user_query',
print run_sql("DELETE FROM user_query WHERE id_user IN (2000,2100,2200,2300,2400,2500) OR id_query IN (155,231,574,988) ")
print 'delete from query',
print run_sql("DELETE FROM query WHERE id IN (155,231,574,988) ")
print 'delete from basket',
print run_sql("DELETE FROM basket WHERE id IN (6,7,8,9) ")
print 'delete from user_basket',
print run_sql("DELETE FROM user_basket WHERE id_basket IN (6,7,8,9) OR id_user IN (2000, 2200, 2500) ")
print 'delete from user_query_basket',
print run_sql("DELETE FROM user_query_basket WHERE id_user IN (2200, 2500) ")
def test_deletedata_nooutput():
"""same as test_deletedata without output."""
run_sql("DELETE FROM session WHERE uid IN (2000,2100,2200,2300,2400,2500) ")
run_sql("DELETE FROM user WHERE id IN (2000,2100,2200,2300,2400,2500) ")
run_sql("DELETE FROM user_query WHERE id_user IN (2000,2100,2200,2300,2400,2500) OR id_query IN (155,231,574,988) ")
run_sql("DELETE FROM query WHERE id IN (155,231,574,988) ")
run_sql("DELETE FROM basket WHERE id IN (6,7,8,9) ")
run_sql("DELETE FROM user_basket WHERE id_basket IN (6,7,8,9) OR id_user IN (2000, 2200, 2500) ")
run_sql("DELETE FROM user_query_basket WHERE id_user IN (2200, 2500) ")
def test_showdata():
print '\nshow test data:'
print '\n- select * from session:'
for r in run_sql("SELECT * FROM session WHERE session_key IN ('23A','24B','25C','26D','27E','28F') "): print r
print '\n- select * from user:'
for r in run_sql("SELECT * FROM user WHERE email = '' AND id IN (2000,2100,2200,2300,2400,2500) "): print r
print '\n- select * from user_query:'
for r in run_sql("SELECT * FROM user_query WHERE id_user IN (2000,2100,2200,2300,2400,2500) "): print r
print '\n- select * from query:'
for r in run_sql("SELECT * FROM query WHERE id IN (155,231,574,988) "): print r
print '\n- select * from basket:'
for r in run_sql("SELECT * FROM basket WHERE id IN (6,7,8,9) "): print r
print '\n- select * from user_basket:'
for r in run_sql("SELECT * FROM user_basket WHERE id_basket IN (6,7,8,9)"): print r
print '\n- select * from user_query_basket:'
for r in run_sql("SELECT * FROM user_query_basket WHERE id_basket IN (6,7,8,9) "): print r
def test_checkdata():
"""checks wether the data in the database is correct after
the garbage collector has run.
test_insertdata must have been run followed by the gc for this to be true."""
result = run_sql("SELECT DISTINCT session_key FROM session WHERE session_key IN ('23A','24B','25C','26D','27E','28F') ")
if len(result) != 2: return 0
for r in [('27E', ), ('28F', )]:
if r not in result: return 0
result = run_sql("SELECT id FROM user WHERE email = '' AND id IN (2000,2100,2200,2300,2400,2500) ")
if len(result) != 2: return 0
for r in [(2400, ), (2500, )]:
if r not in result: return 0
result = run_sql("SELECT DISTINCT id_user FROM user_query WHERE id_user IN (2000,2100,2200,2300,2400,2500) ")
if len(result) != 2: return 0
for r in [(2400, ), (2500, )]:
if r not in result: return 0
result = run_sql("SELECT id FROM query WHERE id IN (155,231,574,988) ")
if len(result) != 2: return 0
for r in [(155, ), (988, )]:
if r not in result: return 0
result = run_sql("SELECT id FROM basket WHERE id IN (6,7,8,9) ")
if len(result) != 1: return 0
for r in [(9, )]:
if r not in result: return 0
result = run_sql("SELECT id_user, id_basket FROM user_basket WHERE id_basket IN (6,7,8,9)")
if len(result) != 1: return 0
for r in [(2500, 9)]:
if r not in result: return 0
result = run_sql("SELECT id_user, id_query, id_basket FROM user_query_basket WHERE id_basket IN (6,7,8,9) ")
if len(result) != 1: return 0
for r in [(2500, 988, 9)]:
if r not in result: return 0
return 1
def test_runtest_guest_user_garbage_collector():
"""a test to see if the garbage collector works correctly."""
test_insertdata()
test_showdata()
guest_user_garbage_collector(verbose=9)
test_showdata()
if test_checkdata():
print '\n\nGARBAGE COLLECTOR CLEANED UP THE CORRECT DATA \n\n'
else:
print '\n\nERROR ERROR ERROR - WRONG DATA CLEANED - ERROR ERROR ERROR \n\n'
test_deletedata_nooutput()
return
def main():
"""CLI to the session garbage collector.
Gets arguments from sys.argv and dispatch to guest_user_garbage_collector"""
options = {}
options['verbose'] = 1
try:
opts, args = getopt.getopt(sys.argv[1:], "hVv:", ["help", "version", "verbose="])
except getopt.GetoptError, e:
usage(e)
try:
for opt in opts:
if opt[0] in ['-h', '--help']:
usage(0)
elif opt[0] in ['-V', '--version']:
print __version__
sys.exit(0)
elif opt[0] in ['-v', '--verbose']:
options['verbose'] = int(opt[1])
except StandardError, e:
usage(e)
guest_user_garbage_collector(**options)
return
if __name__ == '__main__':
main()

Event Timeline