Page MenuHomec4science

webstat.py
No OneTemporary

File Metadata

Created
Sun, Nov 3, 18:46

webstat.py

## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__ = "$Id$"
__lastupdated__ = "$Date$"
import os
import time
import re
import datetime
import cPickle
import calendar
from urllib import quote
from invenio import template
from invenio.config import \
CFG_SITE_NAME, \
CFG_WEBDIR, \
CFG_TMPDIR, \
CFG_SITE_URL, \
CFG_SITE_LANG
from invenio.webstat_config import CFG_WEBSTAT_CONFIG_PATH
from invenio.search_engine import get_alphabetically_ordered_collection_list
from invenio.dbquery import run_sql, wash_table_column_name
from invenio.bibsched import is_task_scheduled, get_task_ids_by_descending_date, get_task_options
# Imports handling key events
from invenio.webstat_engine import get_keyevent_trend_collection_population
from invenio.webstat_engine import get_keyevent_trend_search_frequency
from invenio.webstat_engine import get_keyevent_trend_search_type_distribution
from invenio.webstat_engine import get_keyevent_trend_download_frequency
from invenio.webstat_engine import get_keyevent_snapshot_apache_processes
from invenio.webstat_engine import get_keyevent_snapshot_bibsched_status
from invenio.webstat_engine import get_keyevent_snapshot_uptime_cmd
from invenio.webstat_engine import get_keyevent_snapshot_sessions
# Imports handling custom events
from invenio.webstat_engine import get_customevent_table
from invenio.webstat_engine import get_customevent_trend
from invenio.webstat_engine import get_customevent_dump
# Imports for handling outputting
from invenio.webstat_engine import create_graph_trend
from invenio.webstat_engine import create_graph_dump
# Imports for handling exports
from invenio.webstat_engine import export_to_python
from invenio.webstat_engine import export_to_csv
TEMPLATES = template.load('webstat')
# Constants
WEBSTAT_CACHE_INTERVAL = 600 # Seconds, cache_* functions not affected by this.
# Also not taking into account if BibSched has webstatadmin process.
WEBSTAT_RAWDATA_DIRECTORY = CFG_TMPDIR + "/"
WEBSTAT_GRAPH_DIRECTORY = CFG_WEBDIR + "/img/"
TYPE_REPOSITORY = [ ('gnuplot', 'Image - Gnuplot'),
('asciiart', 'Image - ASCII art'),
('asciidump', 'Image - ASCII dump'),
('python', 'Data - Python code', export_to_python),
('csv', 'Data - CSV', export_to_csv) ]
# Key event repository, add an entry here to support new key measures.
KEYEVENT_REPOSITORY = { 'collection population':
{ 'fullname': 'Collection population',
'specificname': 'Population in collection "%(collection)s"',
'gatherer': get_keyevent_trend_collection_population,
'extraparams': {'collection': ('Collection', get_alphabetically_ordered_collection_list)},
'cachefilename': 'webstat_%(id)s_%(collection)s_%(timespan)s',
'ylabel': 'Number of records',
'multiple': None,
},
'search frequency':
{ 'fullname': 'Search frequency',
'specificname': 'Search frequency',
'gatherer': get_keyevent_trend_search_frequency,
'extraparams': {},
'cachefilename': 'webstat_%(id)s_%(timespan)s',
'ylabel': 'Number of searches',
'multiple': None,
},
'search type distribution':
{ 'fullname': 'Search type distribution',
'specificname': 'Search type distribution',
'gatherer': get_keyevent_trend_search_type_distribution,
'extraparams': {},
'cachefilename': 'webstat_%(id)s_%(timespan)s',
'ylabel': 'Number of searches',
'multiple': ['Simple searches', 'Advanced searches'],
},
'download frequency':
{ 'fullname': 'Download frequency',
'specificname': 'Download frequency',
'gatherer': get_keyevent_trend_download_frequency,
'extraparams': {},
'cachefilename': 'webstat_%(id)s_%(timespan)s',
'ylabel': 'Number of downloads',
'multiple': None,
}
}
# CLI
def create_customevent(id=None, name=None, cols=[]):
"""
Creates a new custom event by setting up the necessary MySQL tables.
@param id: Proposed human-readable id of the new event.
@type id: str
@param name: Optionally, a descriptive name.
@type name: str
@param cols: Optionally, the name of the additional columns.
@type cols: [str]
@return: A status message
@type: str
"""
if id is None:
return "Please specify a human-readable ID for the event."
# Only accept id and name with standard characters
if not re.search("[^\w]", str(id) + str(name)) is None:
return "Please note that both event id and event name needs to be written without any non-standard characters."
# Make sure the chosen id is not already taken
if len(run_sql("SELECT NULL FROM staEVENT WHERE id = %s", (id,))) != 0:
return "Event id [%s] already exists! Aborted." % id
# Check if the cols are valid titles
for argument in cols:
if (argument == "creation_time") or (argument == "id"):
return "Invalid column title: %s! Aborted." % argument
# Insert a new row into the events table describing the new event
sql_param = [id]
if name is not None:
sql_name = "%s"
sql_param.append(name)
else:
sql_name = "NULL"
if len(cols) != 0:
sql_cols = "%s"
sql_param.append(cPickle.dumps(cols))
else:
sql_cols = "NULL"
run_sql("INSERT INTO staEVENT (id, name, cols) VALUES (%s, " + sql_name + ", " + sql_cols + ")",
tuple(sql_param))
tbl_name = get_customevent_table(id)
# Create a table for the new event
sql_query = ["CREATE TABLE %s (" % tbl_name]
sql_query.append("id MEDIUMINT unsigned NOT NULL auto_increment,")
sql_query.append("creation_time TIMESTAMP DEFAULT NOW(),")
for argument in cols:
arg = wash_table_column_name(argument)
sql_query.append("%s MEDIUMTEXT NULL," % arg)
sql_query.append("INDEX %s (%s(50))," % (arg, arg))
sql_query.append("PRIMARY KEY (id))")
sql_str = ' '.join(sql_query)
run_sql(sql_str)
# We're done! Print notice containing the name of the event.
return ("Event table [%s] successfully created.\n" +
"Please use event id [%s] when registering an event.") % (tbl_name, id)
def modify_customevent(id=None, name=None, cols=[]):
"""
Modify a custom event. It can modify the columns definition
or/and the descriptive name
@param id: Human-readable id of the event.
@type id: str
@param name: Optionally, a descriptive name.
@type name: str
@param cols: Optionally, the name of the additional columns.
@type cols: [str]
@return: A status message
@type: str
"""
if id is None:
return "Please specify a human-readable ID for the event."
# Only accept name with standard characters
if not re.search("[^\w]", str(name)) is None:
return "Please note that event name needs to be written without any non-standard characters."
# Check if the cols are valid titles
for argument in cols:
if (argument == "creation_time") or (argument == "id"):
return "Invalid column title: %s! Aborted." % argument
res = run_sql("SELECT CONCAT('staEVENT', number), cols FROM staEVENT WHERE id = %s", (id,))
cols_orig = cPickle.loads(res[0][1])
# add new cols
cols_add = []
for col in cols:
if not col in cols_orig:
cols_add.append(col)
# del old cols
cols_del = []
for col in cols_orig:
if not col in cols:
cols_del.append(col)
#modify event table
if cols_del or cols_add:
sql_query = ["ALTER TABLE %s " % res[0][0]]
for col in cols_del:
sql_query.append("DROP COLUMN `%s`" % col)
sql_query.append(", ")
for col in cols_add:
sql_query.append("ADD COLUMN `%s` MEDIUMTEXT NULL, " % col)
sql_query.append("ADD INDEX `%s` (`%s`(50))" % (col, col))
sql_query.append(", ")
sql_query[-1] = ";"
run_sql("".join(sql_query))
#modify event definition
sql_query = ["UPDATE staEVENT SET"]
sql_param = []
if cols_del or cols_add:
sql_query.append("cols = %s")
sql_query.append(",")
sql_param.append(cPickle.dumps(cols))
if name:
sql_query.append("name = %s")
sql_query.append(",")
sql_param.append(name)
if sql_param:
sql_query[-1] = "WHERE id = %s"
sql_param.append(id)
sql_str = ' '.join(sql_query)
run_sql(sql_str, sql_param)
# We're done! Print notice containing the name of the event.
return ("Event table [%s] successfully modified." % (id,))
def destroy_customevent(id=None):
"""
Removes an existing custom event by destroying the MySQL tables and
the event data that might be around. Use with caution!
@param id: Human-readable id of the event to be removed.
@type id: str
@return: A status message
@type: str
"""
if id is None:
return "Please specify an existing event id."
# Check if the specified id exists
if len(run_sql("SELECT NULL FROM staEVENT WHERE id = %s", (id,))) == 0:
return "Event id [%s] doesn't exist! Aborted." % id
else:
tbl_name = get_customevent_table(id)
run_sql("DROP TABLE %s" % tbl_name)
run_sql("DELETE FROM staEVENT WHERE id = %s", (id,))
return ("Event with id [%s] was successfully destroyed.\n" +
"Table [%s], with content, was destroyed.") % (id, tbl_name)
def register_customevent(id, *arguments):
"""
Registers a custom event. Will add to the database's event tables
as created by create_customevent().
This function constitutes the "function hook" that should be
called throughout CDS Invenio where one wants to register a
custom event! Refer to the help section on the admin web page.
@param id: Human-readable id of the event to be registered
@type id: str
@param *arguments: The rest of the parameters of the function call
@type *arguments: [params]
"""
res = run_sql("SELECT CONCAT('staEVENT', number),cols FROM staEVENT WHERE id = %s", (id,))
if not res:
return # the id does not exist
tbl_name = res[0][0]
if res[0][1]:
col_titles = cPickle.loads(res[0][1])
else:
col_titles = []
if len(col_titles) != len(arguments[0]):
return # there is different number of arguments than cols
# Make sql query
if len(arguments[0]) != 0:
sql_param = []
sql_query = ["INSERT INTO %s (" % tbl_name]
for title in col_titles:
sql_query.append("`%s`" % title)
sql_query.append(",")
sql_query.pop() # del the last ','
sql_query.append(") VALUES (")
for argument in arguments[0]:
sql_query.append("%s")
sql_query.append(",")
sql_param.append(argument)
sql_query.pop() # del the last ','
sql_query.append(")")
sql_str = ''.join(sql_query)
run_sql(sql_str, tuple(sql_param))
else:
run_sql("INSERT INTO %s () VALUES ()" % tbl_name)
def cache_keyevent_trend(ids=[]):
"""
Runs the rawdata gatherer for the specific key events.
Intended to be run mainly but the BibSched daemon interface.
For a specific id, all possible timespans' rawdata is gathered.
@param ids: The key event ids that are subject to caching.
@type ids: []
"""
args = {}
timespans = _get_timespans()
for id in ids:
args['id'] = id
extraparams = KEYEVENT_REPOSITORY[id]['extraparams']
# Construct all combinations of extraparams and store as [{param name: arg value}]
# so as we can loop over them and just pattern-replace the each dictionary against
# the KEYEVENT_REPOSITORY['id']['cachefilename'].
combos = [[]]
for x in [[(param, x[0]) for x in extraparams[param][1]()] for param in extraparams]:
combos = [i + [y] for y in x for i in combos]
combos = [dict(x) for x in combos]
for i in range(len(timespans)):
# Get timespans parameters
args['timespan'] = timespans[i][0]
args.update({ 't_start': timespans[i][2], 't_end': timespans[i][3], 'granularity': timespans[i][4],
't_format': timespans[i][5], 'xtic_format': timespans[i][6] })
for combo in combos:
args.update(combo)
# Create unique filename for this combination of parameters
filename = KEYEVENT_REPOSITORY[id]['cachefilename'] \
% dict([(param, re.subn("[^\w]", "_", args[param])[0]) for param in args])
# Create closure of gatherer function in case cache needs to be refreshed
gatherer = lambda: KEYEVENT_REPOSITORY[id]['gatherer'](args)
# Get data file from cache, ALWAYS REFRESH DATA!
_get_file_using_cache(filename, gatherer, True).read()
return True
def cache_customevent_trend(ids=[]):
"""
Runs the rawdata gatherer for the specific custom events.
Intended to be run mainly but the BibSched daemon interface.
For a specific id, all possible timespans' rawdata is gathered.
@param ids: The custom event ids that are subject to caching.
@type ids: []
"""
args = {}
timespans = _get_timespans()
for id in ids:
args['id'] = id
args['cols'] = []
for i in range(len(timespans)):
# Get timespans parameters
args['timespan'] = timespans[i][0]
args.update({ 't_start': timespans[i][2], 't_end': timespans[i][3], 'granularity': timespans[i][4],
't_format': timespans[i][5], 'xtic_format': timespans[i][6] })
# Create unique filename for this combination of parameters
filename = "webstat_customevent_%(id)s_%(timespan)s" \
% { 'id': re.subn("[^\w]", "_", id)[0], 'timespan': re.subn("[^\w]", "_", args['timespan'])[0] }
# Create closure of gatherer function in case cache needs to be refreshed
gatherer = lambda: get_customevent_trend(args)
# Get data file from cache, ALWAYS REFRESH DATA!
_get_file_using_cache(filename, gatherer, True).read()
return True
def basket_display():
"""
Display basket statistics.
"""
tbl_name = get_customevent_table("baskets")
if not tbl_name:
# custom event baskets not defined, so return empty output:
return []
try:
res = run_sql("SELECT creation_time FROM %s ORDER BY creation_time" % tbl_name)
days = (res[-1][0] - res[0][0]).days + 1
public = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'display_public'" % tbl_name)[0][0]
users = run_sql("SELECT COUNT(DISTINCT user) FROM %s" % tbl_name)[0][0]
adds = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'add'" % tbl_name)[0][0]
displays = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'display' OR action = 'display_public'" % tbl_name)[0][0]
hits = adds + displays
average = hits / days
res = [("Basket page hits", hits)]
res.append((" Average per day", average))
res.append((" Unique users", users))
res.append((" Additions", adds))
res.append((" Public", public))
except IndexError:
res = []
return res
def alert_display():
"""
Display alert statistics.
"""
tbl_name = get_customevent_table("alerts")
if not tbl_name:
# custom event alerts not defined, so return empty output:
return []
try:
res = run_sql("SELECT creation_time FROM %s ORDER BY creation_time" % tbl_name)
days = (res[-1][0] - res[0][0]).days + 1
res = run_sql("SELECT COUNT(DISTINCT user),COUNT(*) FROM %s" % tbl_name)
users = res[0][0]
hits = res[0][1]
displays = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'list'" % tbl_name)[0][0]
search = run_sql("SELECT COUNT(*) FROM %s WHERE action = 'display'" % tbl_name)[0][0]
average = hits / days
res = [("Alerts page hits", hits)]
res.append((" Average per day", average))
res.append((" Unique users", users))
res.append((" Displays", displays))
res.append((" Searches history display", search))
except IndexError:
res = []
return res
def get_url_customevent(url_dest, id, *arguments):
"""
Get an url for registers a custom event. Every time is load the
url will register a customevent as register_customevent().
@param id: Human-readable id of the event to be registered
@type id: str
@param *arguments: The rest of the parameters of the function call
the param "WEBSTAT_IP" will tell webstat that here
should be the IP who request the url
@type *arguments: [params]
@param id: url to redirect after register the event
@type id: str
@return: url for register event
@type: str
"""
return "%s/stats/customevent_register?id=%s&arg=%s&url=%s" % \
(CFG_SITE_URL, id, ','.join(arguments[0]), quote(url_dest))
# WEB
def perform_request_index(ln=CFG_SITE_LANG):
"""
Displays some informative text, the health box, and a the list of
key/custom events.
"""
from ConfigParser import ConfigParser
conf = ConfigParser()
conf.read(CFG_WEBSTAT_CONFIG_PATH)
out = TEMPLATES.tmpl_welcome(ln=ln)
# Prepare the health base data
health_indicators = []
now = datetime.datetime.now()
yesterday = (now-datetime.timedelta(days=1)).strftime("%Y-%m-%d")
today = now.strftime("%Y-%m-%d")
tomorrow = (now+datetime.timedelta(days=1)).strftime("%Y-%m-%d")
# Append session information to the health box
if conf.get("general", "visitors_box") == "True":
sess = get_keyevent_snapshot_sessions()
health_indicators.append(("Total active visitors", sum(sess)))
health_indicators.append((" Logged in", sess[1]))
health_indicators.append(None)
# Append searches information to the health box
if conf.get("general", "search_box") == "True":
args = { 't_start': today, 't_end': tomorrow, 'granularity': "day", 't_format': "%Y-%m-%d" }
searches = get_keyevent_trend_search_type_distribution(args)
health_indicators.append(("Searches since midnight", sum(searches[0][1])))
health_indicators.append((" Simple", searches[0][1][0]))
health_indicators.append((" Advanced", searches[0][1][1]))
health_indicators.append(None)
# Append new records information to the health box
if conf.get("general", "record_box") == "True":
args = { 'collection': CFG_SITE_NAME, 't_start': today, 't_end': tomorrow, 'granularity': "day", 't_format': "%Y-%m-%d" }
try: tot_records = get_keyevent_trend_collection_population(args)[0][1]
except IndexError: tot_records = 0
args = { 'collection': CFG_SITE_NAME, 't_start': yesterday, 't_end': today, 'granularity': "day", 't_format': "%Y-%m-%d" }
try: new_records = tot_records - get_keyevent_trend_collection_population(args)[0][1]
except IndexError: new_records = 0
health_indicators.append(("Total records", tot_records))
health_indicators.append((" New records since midnight", new_records))
health_indicators.append(None)
# Append status of BibSched queue to the health box
if conf.get("general", "bibsched_box") == "True":
bibsched = get_keyevent_snapshot_bibsched_status()
health_indicators.append(("BibSched queue", sum([x[1] for x in bibsched])))
for item in bibsched:
health_indicators.append((" " + item[0], str(item[1])))
health_indicators.append(None)
# Append basket stats to the health box
if conf.get("general", "basket_box") == "True":
health_indicators += basket_display()
health_indicators.append(None)
# Append alerts stats to the health box
if conf.get("general", "alert_box") == "True":
health_indicators += alert_display()
health_indicators.append(None)
# Append number of Apache processes to the health box
if conf.get("general", "apache_box") == "True":
health_indicators.append(("Apache processes", get_keyevent_snapshot_apache_processes()))
# Append uptime and load average to the health box
if conf.get("general", "uptime_box") == "True":
health_indicators.append(("Uptime cmd", get_keyevent_snapshot_uptime_cmd()))
# Display the health box
out += TEMPLATES.tmpl_system_health(health_indicators, ln=ln)
# Produce a list of the key statistics
out += TEMPLATES.tmpl_keyevent_list(ln=ln)
# Display the custom statistics
out += TEMPLATES.tmpl_customevent_list(_get_customevents(), ln=ln)
return out
def perform_display_keyevent(id=None, args={}, req=None, ln=CFG_SITE_LANG):
"""
Display key events using a certain output type over the given time span.
@param ids: The ids for the custom events that are to be displayed.
@type ids: [str]
@param args: { param name: argument value }
@type args: { str: str }
@param req: The Apache request object, necessary for export redirect.
@type req:
"""
# Get all the option lists: { parameter name: [(argument internal name, argument full name)]}
options = dict([(param,
(KEYEVENT_REPOSITORY[id]['extraparams'][param][0],
KEYEVENT_REPOSITORY[id]['extraparams'][param][1]())) for param in
KEYEVENT_REPOSITORY[id]['extraparams']] +
[('timespan', ('Time span', _get_timespans())), ('format', ('Output format', _get_formats()))])
# Order of options
order = [param for param in KEYEVENT_REPOSITORY[id]['extraparams']] + ['timespan', 'format']
# Build a dictionary for the selected parameters: { parameter name: argument internal name }
choosed = dict([(param, args[param]) for param in KEYEVENT_REPOSITORY[id]['extraparams']] +
[('timespan', args['timespan']), ('format', args['format'])])
# Send to template to prepare event customization FORM box
out = TEMPLATES.tmpl_keyevent_box(options, order, choosed, ln=ln)
# Arguments OK?
# Check for existance. If nothing, only show FORM box from above.
if len(choosed) == 0:
return out
# Make sure extraparams are valid, if any
for param in choosed:
if not choosed[param] in [x[0] for x in options[param][1]]:
return out + TEMPLATES.tmpl_error('Please specify a valid value for parameter "%s".'
% options[param][0], ln=ln)
# Arguments OK beyond this point!
# Get unique name for caching purposes (make sure that the params used in the filename are safe!)
filename = KEYEVENT_REPOSITORY[id]['cachefilename'] \
% dict([(param, re.subn("[^\w]", "_", choosed[param])[0]) for param in choosed] +
[('id', re.subn("[^\w]", "_", id)[0])])
# Get time parameters from repository
# TODO: This should quite possibly be lifted out (webstat_engine?), in any case a cleaner repository
_, t_fullname, t_start, t_end, granularity, t_format, xtic_format = \
options['timespan'][1][[x[0] for x in options['timespan'][1]].index(choosed['timespan'])]
args = { 't_start': t_start, 't_end': t_end, 'granularity': granularity,
't_format': t_format, 'xtic_format': xtic_format }
for param in KEYEVENT_REPOSITORY[id]['extraparams']:
args[param] = choosed[param]
# Create closure of frequency function in case cache needs to be refreshed
gatherer = lambda: KEYEVENT_REPOSITORY[id]['gatherer'](args)
# Determine if this particular file is due for scheduling cacheing, in that case we must not
# allow refreshing of the rawdata.
allow_refresh = not _is_scheduled_for_cacheing(id)
# Get data file from cache (refresh if necessary)
data = eval(_get_file_using_cache(filename, gatherer, allow_refresh=allow_refresh).read())
# If type indicates an export, run the export function and we're done
if _is_type_export(choosed['format']):
_get_export_closure(choosed['format'])(data, req)
return out
# Prepare the graph settings that are being passed on to grapher
settings = { "title": KEYEVENT_REPOSITORY[id]['specificname'] % choosed,
"xlabel": t_fullname + ' (' + granularity + ')',
"ylabel": KEYEVENT_REPOSITORY[id]['ylabel'],
"xtic_format": xtic_format,
"format": choosed['format'],
"multiple": KEYEVENT_REPOSITORY[id]['multiple'] }
return out + _perform_display_event(data, os.path.basename(filename), settings, ln=ln)
def perform_display_customevent(ids=[], args={}, req=None, ln=CFG_SITE_LANG):
"""
Display custom events using a certain output type over the given time span.
@param ids: The ids for the custom events that are to be displayed.
@type ids: [str]
@param args: { param name: argument value }
@type args: { str: str }
@param req: The Apache request object, necessary for export redirect.
@type req:
"""
# Get all the option lists: { parameter name: [(argument internal name, argument full name)]}
cols_dict = _get_customevent_cols()
cols_dict['__header'] = 'Argument'
cols_dict['__none'] = []
options = { 'ids': ('Custom event', _get_customevents()),
'timespan': ('Time span', _get_timespans()),
'format': ('Output format', _get_formats(True)),
'cols': cols_dict }
# Build a dictionary for the selected parameters: { parameter name: argument internal name }
choosed = { 'ids': args['ids'], 'timespan': args['timespan'], 'format': args['format']}
# Calculate cols
index = []
for key in args.keys():
if key[:4] == 'cols':
index.append(key[4:])
index.sort()
choosed['cols'] = [ zip([""]+args['bool'+i], args['cols'+i], args['col_value'+i])
for i in index ]
# Send to template to prepare event customization FORM box
out = TEMPLATES.tmpl_customevent_box(options, choosed, ln=ln)
# Arguments OK?
# Make sure extraparams are valid, if any
for param in ['ids', 'timespan', 'format']:
legalvalues = [x[0] for x in options[param][1]]
if type(args[param]) is list:
# If the argument is a list, like the content of 'ids' every value has to be checked
if len(args[param]) == 0:
return out + TEMPLATES.tmpl_error('Please specify a valid value for parameter "%s".' % options[param][0], ln=ln)
for arg in args[param]:
if not arg in legalvalues:
return out + TEMPLATES.tmpl_error('Please specify a valid value for parameter "%s".' % options[param][0], ln=ln)
else:
if not args[param] in legalvalues:
return out + TEMPLATES.tmpl_error('Please specify a valid value for parameter "%s".' % options[param][0], ln=ln)
# Fetch time parameters from repository
_, t_fullname, t_start, t_end, granularity, t_format, xtic_format = \
options['timespan'][1][[x[0] for x in options['timespan'][1]].index(choosed['timespan'])]
args_req = { 't_start': t_start, 't_end': t_end, 'granularity': granularity,
't_format': t_format, 'xtic_format': xtic_format }
data_unmerged = []
# ASCII dump data is different from the standard formats
if choosed['format'] == 'asciidump':
for i in [ str(j) for j in range(len(ids))]:
args['bool'+i].insert(0, "")
args_req['cols'+i] = zip(args['bool'+i], args['cols'+i], args['col_value'+i])
filename = "webstat_customevent_" + re.subn("[^\w]", "", ''.join(ids) + "_" + choosed['timespan'] + "_" + '-'.join([ ':'.join(col) for col in [ args['cols'+str(i)] for i in range(len(ids))]]) + "_asciidump")[0]
args_req['ids'] = ids
gatherer = lambda: get_customevent_dump(args_req)
data = eval(_get_file_using_cache(filename, gatherer).read())
else:
for id, i in [ (ids[i], str(i)) for i in range(len(ids))]:
# Calculate cols
args_req['cols'] = choosed['cols'][int(i)]
# Get unique name for the rawdata file (wash arguments!)
filename = "webstat_customevent_" + re.subn("[^\w]", "", id + "_" + choosed['timespan'] + "_" + '-'.join([ ':'.join(col) for col in args_req['cols']]))[0]
# Add the current id to the gatherer's arguments
args_req['id'] = id
# Prepare raw data gatherer, if cache needs refreshing.
gatherer = lambda: get_customevent_trend(args_req)
# Determine if this particular file is due for scheduling cacheing, in that case we must not
# allow refreshing of the rawdata.
allow_refresh = not _is_scheduled_for_cacheing(id)
# Get file from cache, and evaluate it to trend data
data_unmerged.append(eval(_get_file_using_cache(filename, gatherer, allow_refresh=allow_refresh).read()))
# Merge data from the unmerged trends into the final destination
data = [(x[0][0], tuple([y[1] for y in x])) for x in zip(*data_unmerged)]
# If type indicates an export, run the export function and we're done
if _is_type_export(args['format']):
_get_export_closure(args['format'])(data, req)
return out
# Get full names, for those that have them
names = []
events = _get_customevents()
for id in ids:
temp = events[[x[0] for x in events].index(id)]
if temp[1] != None:
names.append(temp[1])
else:
names.append(temp[0])
# Generate a filename for the graph
filename = "tmp_webstat_customevent_" + ''.join([re.subn("[^\w]", "", id)[0] for id in ids]) + "_" + choosed['timespan']
settings = { "title": 'Custom event',
"xlabel": t_fullname + ' (' + granularity + ')',
"ylabel": "Action quantity",
"xtic_format": xtic_format,
"format": choosed['format'],
"multiple": (type(ids) is list) and names or [] }
return out + _perform_display_event(data, os.path.basename(filename), settings, ln=ln)
def perform_display_customevent_help(ln=CFG_SITE_LANG):
"""Display the custom event help"""
return TEMPLATES.tmpl_customevent_help(ln=ln)
# INTERNALS
def _perform_display_event(data, name, settings, ln=CFG_SITE_LANG):
"""
Retrieves a graph.
@param data: The trend/dump data
@type data: [(str, str|int|(str|int,...))] | [(str|int,...)]
@param name: The name of the trend (to be used as basename of graph file)
@type name: str
@param settings: Dictionary of graph parameters
@type settings: dict
@return: The URL of the graph (ASCII or image)
@type: str
"""
path = WEBSTAT_GRAPH_DIRECTORY + "tmp_" + name
# Generate, and insert using the appropriate template
if settings["format"] != "asciidump":
create_graph_trend(data, path, settings)
if settings["format"] == "asciiart":
return TEMPLATES.tmpl_display_event_trend_ascii(settings["title"], path, ln=ln)
else:
if settings["format"] == "gnuplot":
try:
import Gnuplot
except ImportError:
return 'Gnuplot is not installed. Returning ASCII art.' +\
TEMPLATES.tmpl_display_event_trend_ascii(settings["title"], path, ln=ln)
return TEMPLATES.tmpl_display_event_trend_image(settings["title"], path, ln=ln)
else:
path += "_asciidump"
create_graph_dump(data, path, settings)
return TEMPLATES.tmpl_display_event_trend_ascii(settings["title"], path, ln=ln)
def _get_customevents():
"""
Retrieves registered custom events from the database.
@return: [(internal name, readable name)]
@type: [(str, str)]
"""
return [(x[0], x[1]) for x in run_sql("SELECT id, name FROM staEVENT")]
def _get_timespans(dt=None):
"""
Helper function that generates possible time spans to be put in the
drop-down in the generation box. Computes possible years, and also some
pre-defined simpler values. Some items in the list returned also tweaks the
output graph, if any, since such values are closely related to the nature
of the time span.
@param dt: A datetime object indicating the current date and time
@type dt: datetime.datetime
@return: [(Internal name, Readable name, t_start, t_end, granularity, format, xtic_format)]
@type [(str, str, str, str, str, str, str)]
"""
if dt is None:
dt = datetime.datetime.now()
format = "%Y-%m-%d"
# Helper function to return a timediff object reflecting a diff of x days
d_diff = lambda x: datetime.timedelta(days=x)
# Helper function to return the number of days in the month x months ago
d_in_m = lambda x: calendar.monthrange(((dt.month-x<1) and dt.year-1 or dt.year),
(((dt.month-1)-x)%12+1))[1]
to_str = lambda x: x.strftime(format)
dt_str = to_str(dt)
spans = [("today", "Today",
dt_str,
to_str(dt+d_diff(1)),
"hour", format, "%H"),
("this week", "This week",
to_str(dt-d_diff(dt.weekday())),
to_str(dt+d_diff(1)),
"day", format, "%a"),
("last week", "Last week",
to_str(dt-d_diff(dt.weekday()+7)),
to_str(dt-d_diff(dt.weekday())),
"day", format, "%a"),
("this month", "This month",
to_str(dt-d_diff(dt.day)+d_diff(1)),
to_str(dt+d_diff(1)),
"day", format, "%d"),
("last month", "Last month",
to_str(dt-d_diff(d_in_m(1))-d_diff(dt.day)+d_diff(1)),
to_str(dt-d_diff(dt.day)+d_diff(1)),
"day", format, "%d"),
("last three months", "Last three months",
to_str(dt-d_diff(d_in_m(1))-d_diff(d_in_m(2))-d_diff(dt.day)+d_diff(1)),
dt_str,
"month", format, "%b"),
("last year", "Last year",
to_str(dt.replace(year=dt.year-1,month=(dt.month+1) % 12, day=1)),
to_str(dt.replace(month=(dt.month+1) % 12, day=1)),
"month", format, "%b")]
# Get first year as indicated by the content's in bibrec
try:
y1 = run_sql("SELECT creation_date FROM bibrec ORDER BY creation_date LIMIT 1")[0][0].year
except IndexError:
y1 = dt.year
y2 = time.localtime()[0]
diff_year = y2 - y1
if diff_year >= 2:
spans.append(("last 2 years", "Last 2 years",
to_str(dt.replace(year=dt.year-2, month=(dt.month+1) % 12)),
to_str(dt.replace(month=(dt.month+1) % 12)),
"month", format, "%b"))
if diff_year >= 5:
spans.append(("last 5 years", "Last 5 years",
to_str(dt.replace(year=dt.year-5,month=1)),
to_str(dt.replace(month=(dt.month+1) % 12)),
"year", format, "%Y"))
if diff_year >= 10:
spans.append(("last 10 years", "Last 10 years",
to_str(dt.replace(year=dt.year-10,month=1)),
to_str(dt.replace(month=(dt.month+1) % 12)),
"year", format, "%Y"))
spans.append(("full history", "Full history", str(y1), str(y2+1), "year", "%Y", "%Y"))
spans.extend([(str(x), str(x), str(x), str(x+1), "month", "%Y", "%b") for x in range(y2, y1-1, -1)])
return spans
def _get_formats(with_dump=False):
"""
Helper function to retrieve a CDS Invenio friendly list of all possible
output types (displaying and exporting) from the central repository as
stored in the variable self.types at the top of this module.
@param with_dump: Optionally displays the custom-event only type 'asciidump'
@type with_dump: bool
@return: [(Internal name, Readable name)]
@type [(str, str)]
"""
# The third tuple value is internal
if with_dump:
return [(x[0], x[1]) for x in TYPE_REPOSITORY]
else:
return [(x[0], x[1]) for x in TYPE_REPOSITORY if x[0] != 'asciidump']
def _get_customevent_cols(id=""):
"""
List of all the diferent name of columns in customevents.
@return: {id: [(internal name, readable name)]}
@type: {str: [(str, str)]}
"""
sql_str = "SELECT id,cols FROM staEVENT"
sql_param = []
if id:
sql_str += "WHERE id = %s"
sql_param.append(id)
cols = {}
for x in run_sql(sql_str, sql_param):
if x[0]:
if x[1]:
cols[x[0]] = [ (name, name) for name in cPickle.loads(x[1]) ]
else:
cols[x[0]] = []
return cols
def _is_type_export(typename):
"""
Helper function that consults the central repository of types to determine
whether the input parameter represents an export type.
@param typename: Internal type name
@type typename: str
@return: Information whether a certain type exports data
@type: bool
"""
return len(TYPE_REPOSITORY[[x[0] for x in TYPE_REPOSITORY].index(typename)]) == 3
def _get_export_closure(typename):
"""
Helper function that for a certain type, gives back the corresponding export
closure.
@param typename: Internal type name
@type type: str
@return: Closure that exports data to the type's format
@type: function
"""
return TYPE_REPOSITORY[[x[0] for x in TYPE_REPOSITORY].index(typename)][2]
def _get_file_using_cache(filename, closure, force=False, allow_refresh=True):
"""
Uses the CDS Invenio cache, i.e. the tempdir, to see if there's a recent
cached version of the sought-after file in there. If not, use the closure to
compute a new, and return that instead. Relies on CDS Invenio configuration
parameter WEBSTAT_CACHE_INTERVAL.
@param filename: The name of the file that might be cached
@type filename: str
@param closure: A function, that executed will return data to be cached. The
function should return either a string, or something that
makes sense after being interpreted with str().
@type closure: function
@param force: Override cache default value.
@type force: bool
"""
# Absolute path to cached files, might not exist.
filename = os.path.normpath(WEBSTAT_RAWDATA_DIRECTORY + filename)
# Get the modification time of the cached file (if any).
try:
mtime = os.path.getmtime(filename)
except OSError:
# No cached version of this particular file exists, thus the modification
# time is set to 0 for easy logic below.
mtime = 0
# Consider refreshing cache if FORCE or NO CACHE AT ALL, or CACHE EXIST AND REFRESH IS ALLOWED.
if force or mtime == 0 or (mtime > 0 and allow_refresh):
# Is the file modification time recent enough?
if force or (time.time() - mtime > WEBSTAT_CACHE_INTERVAL):
# No! Use closure to compute new content
content = closure()
# Cache the data
open(filename, 'w').write(str(content))
# Return the (perhaps just) cached file
return open(filename, 'r')
def _is_scheduled_for_cacheing(id):
"""
@param id: The event id
@type id: str
@return: Indication of if the event id is scheduling for BibSched execution.
@type: bool
"""
if not is_task_scheduled('webstatadmin'):
return False
# Get the task id
try:
task_id = get_task_ids_by_descending_date('webstatadmin', ['RUNNING', 'WAITING'])[0]
except IndexError:
return False
else:
args = get_task_options(task_id)
return id in (args['keyevents'] + args['customevents'])

Event Timeline