Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90673328
webstat.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 3, 18:46
Size
40 KB
Mime Type
text/x-python
Expires
Tue, Nov 5, 18:46 (2 d)
Engine
blob
Format
Raw Data
Handle
22117838
Attached To
R3600 invenio-infoscience
webstat.py
View Options
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__
=
"$Id$"
__lastupdated__
=
"$Date$"
import
os
import
time
import
re
import
datetime
import
cPickle
import
calendar
from
urllib
import
quote
from
invenio
import
template
from
invenio.config
import
\
CFG_SITE_NAME
,
\
CFG_WEBDIR
,
\
CFG_TMPDIR
,
\
CFG_SITE_URL
,
\
CFG_SITE_LANG
from
invenio.webstat_config
import
CFG_WEBSTAT_CONFIG_PATH
from
invenio.search_engine
import
get_alphabetically_ordered_collection_list
from
invenio.dbquery
import
run_sql
,
wash_table_column_name
from
invenio.bibsched
import
is_task_scheduled
,
get_task_ids_by_descending_date
,
get_task_options
# Imports handling key events
from
invenio.webstat_engine
import
get_keyevent_trend_collection_population
from
invenio.webstat_engine
import
get_keyevent_trend_search_frequency
from
invenio.webstat_engine
import
get_keyevent_trend_search_type_distribution
from
invenio.webstat_engine
import
get_keyevent_trend_download_frequency
from
invenio.webstat_engine
import
get_keyevent_snapshot_apache_processes
from
invenio.webstat_engine
import
get_keyevent_snapshot_bibsched_status
from
invenio.webstat_engine
import
get_keyevent_snapshot_uptime_cmd
from
invenio.webstat_engine
import
get_keyevent_snapshot_sessions
# Imports handling custom events
from
invenio.webstat_engine
import
get_customevent_table
from
invenio.webstat_engine
import
get_customevent_trend
from
invenio.webstat_engine
import
get_customevent_dump
# Imports for handling outputting
from
invenio.webstat_engine
import
create_graph_trend
from
invenio.webstat_engine
import
create_graph_dump
# Imports for handling exports
from
invenio.webstat_engine
import
export_to_python
from
invenio.webstat_engine
import
export_to_csv
TEMPLATES
=
template
.
load
(
'webstat'
)
# Constants
WEBSTAT_CACHE_INTERVAL
=
600
# Seconds, cache_* functions not affected by this.
# Also not taking into account if BibSched has webstatadmin process.
WEBSTAT_RAWDATA_DIRECTORY
=
CFG_TMPDIR
+
"/"
WEBSTAT_GRAPH_DIRECTORY
=
CFG_WEBDIR
+
"/img/"
TYPE_REPOSITORY
=
[
(
'gnuplot'
,
'Image - Gnuplot'
),
(
'asciiart'
,
'Image - ASCII art'
),
(
'asciidump'
,
'Image - ASCII dump'
),
(
'python'
,
'Data - Python code'
,
export_to_python
),
(
'csv'
,
'Data - CSV'
,
export_to_csv
)
]
# Key event repository, add an entry here to support new key measures.
KEYEVENT_REPOSITORY
=
{
'collection population'
:
{
'fullname'
:
'Collection population'
,
'specificname'
:
'Population in collection "
%(collection)s
"'
,
'gatherer'
:
get_keyevent_trend_collection_population
,
'extraparams'
:
{
'collection'
:
(
'Collection'
,
get_alphabetically_ordered_collection_list
)},
'cachefilename'
:
'webstat_
%(id)s
_
%(collection)s
_
%(timespan)s
'
,
'ylabel'
:
'Number of records'
,
'multiple'
:
None
,
},
'search frequency'
:
{
'fullname'
:
'Search frequency'
,
'specificname'
:
'Search frequency'
,
'gatherer'
:
get_keyevent_trend_search_frequency
,
'extraparams'
:
{},
'cachefilename'
:
'webstat_
%(id)s
_
%(timespan)s
'
,
'ylabel'
:
'Number of searches'
,
'multiple'
:
None
,
},
'search type distribution'
:
{
'fullname'
:
'Search type distribution'
,
'specificname'
:
'Search type distribution'
,
'gatherer'
:
get_keyevent_trend_search_type_distribution
,
'extraparams'
:
{},
'cachefilename'
:
'webstat_
%(id)s
_
%(timespan)s
'
,
'ylabel'
:
'Number of searches'
,
'multiple'
:
[
'Simple searches'
,
'Advanced searches'
],
},
'download frequency'
:
{
'fullname'
:
'Download frequency'
,
'specificname'
:
'Download frequency'
,
'gatherer'
:
get_keyevent_trend_download_frequency
,
'extraparams'
:
{},
'cachefilename'
:
'webstat_
%(id)s
_
%(timespan)s
'
,
'ylabel'
:
'Number of downloads'
,
'multiple'
:
None
,
}
}
# CLI
def
create_customevent
(
id
=
None
,
name
=
None
,
cols
=
[]):
"""
Creates a new custom event by setting up the necessary MySQL tables.
@param id: Proposed human-readable id of the new event.
@type id: str
@param name: Optionally, a descriptive name.
@type name: str
@param cols: Optionally, the name of the additional columns.
@type cols: [str]
@return: A status message
@type: str
"""
if
id
is
None
:
return
"Please specify a human-readable ID for the event."
# Only accept id and name with standard characters
if
not
re
.
search
(
"[^\w]"
,
str
(
id
)
+
str
(
name
))
is
None
:
return
"Please note that both event id and event name needs to be written without any non-standard characters."
# Make sure the chosen id is not already taken
if
len
(
run_sql
(
"SELECT NULL FROM staEVENT WHERE id =
%s
"
,
(
id
,)))
!=
0
:
return
"Event id [
%s
] already exists! Aborted."
%
id
# Check if the cols are valid titles
for
argument
in
cols
:
if
(
argument
==
"creation_time"
)
or
(
argument
==
"id"
):
return
"Invalid column title:
%s
! Aborted."
%
argument
# Insert a new row into the events table describing the new event
sql_param
=
[
id
]
if
name
is
not
None
:
sql_name
=
"
%s
"
sql_param
.
append
(
name
)
else
:
sql_name
=
"NULL"
if
len
(
cols
)
!=
0
:
sql_cols
=
"
%s
"
sql_param
.
append
(
cPickle
.
dumps
(
cols
))
else
:
sql_cols
=
"NULL"
run_sql
(
"INSERT INTO staEVENT (id, name, cols) VALUES (
%s
, "
+
sql_name
+
", "
+
sql_cols
+
")"
,
tuple
(
sql_param
))
tbl_name
=
get_customevent_table
(
id
)
# Create a table for the new event
sql_query
=
[
"CREATE TABLE
%s
("
%
tbl_name
]
sql_query
.
append
(
"id MEDIUMINT unsigned NOT NULL auto_increment,"
)
sql_query
.
append
(
"creation_time TIMESTAMP DEFAULT NOW(),"
)
for
argument
in
cols
:
arg
=
wash_table_column_name
(
argument
)
sql_query
.
append
(
"
%s
MEDIUMTEXT NULL,"
%
arg
)
sql_query
.
append
(
"INDEX
%s
(
%s
(50)),"
%
(
arg
,
arg
))
sql_query
.
append
(
"PRIMARY KEY (id))"
)
sql_str
=
' '
.
join
(
sql_query
)
run_sql
(
sql_str
)
# We're done! Print notice containing the name of the event.
return
(
"Event table [
%s
] successfully created.
\n
"
+
"Please use event id [
%s
] when registering an event."
)
%
(
tbl_name
,
id
)
def
modify_customevent
(
id
=
None
,
name
=
None
,
cols
=
[]):
"""
Modify a custom event. It can modify the columns definition
or/and the descriptive name
@param id: Human-readable id of the event.
@type id: str
@param name: Optionally, a descriptive name.
@type name: str
@param cols: Optionally, the name of the additional columns.
@type cols: [str]
@return: A status message
@type: str
"""
if
id
is
None
:
return
"Please specify a human-readable ID for the event."
# Only accept name with standard characters
if
not
re
.
search
(
"[^\w]"
,
str
(
name
))
is
None
:
return
"Please note that event name needs to be written without any non-standard characters."
# Check if the cols are valid titles
for
argument
in
cols
:
if
(
argument
==
"creation_time"
)
or
(
argument
==
"id"
):
return
"Invalid column title:
%s
! Aborted."
%
argument
res
=
run_sql
(
"SELECT CONCAT('staEVENT', number), cols FROM staEVENT WHERE id =
%s
"
,
(
id
,))
cols_orig
=
cPickle
.
loads
(
res
[
0
][
1
])
# add new cols
cols_add
=
[]
for
col
in
cols
:
if
not
col
in
cols_orig
:
cols_add
.
append
(
col
)
# del old cols
cols_del
=
[]
for
col
in
cols_orig
:
if
not
col
in
cols
:
cols_del
.
append
(
col
)
#modify event table
if
cols_del
or
cols_add
:
sql_query
=
[
"ALTER TABLE
%s
"
%
res
[
0
][
0
]]
for
col
in
cols_del
:
sql_query
.
append
(
"DROP COLUMN `
%s
`"
%
col
)
sql_query
.
append
(
", "
)
for
col
in
cols_add
:
sql_query
.
append
(
"ADD COLUMN `
%s
` MEDIUMTEXT NULL, "
%
col
)
sql_query
.
append
(
"ADD INDEX `
%s
` (`
%s
`(50))"
%
(
col
,
col
))
sql_query
.
append
(
", "
)
sql_query
[
-
1
]
=
";"
run_sql
(
""
.
join
(
sql_query
))
#modify event definition
sql_query
=
[
"UPDATE staEVENT SET"
]
sql_param
=
[]
if
cols_del
or
cols_add
:
sql_query
.
append
(
"cols =
%s
"
)
sql_query
.
append
(
","
)
sql_param
.
append
(
cPickle
.
dumps
(
cols
))
if
name
:
sql_query
.
append
(
"name =
%s
"
)
sql_query
.
append
(
","
)
sql_param
.
append
(
name
)
if
sql_param
:
sql_query
[
-
1
]
=
"WHERE id =
%s
"
sql_param
.
append
(
id
)
sql_str
=
' '
.
join
(
sql_query
)
run_sql
(
sql_str
,
sql_param
)
# We're done! Print notice containing the name of the event.
return
(
"Event table [
%s
] successfully modified."
%
(
id
,))
def
destroy_customevent
(
id
=
None
):
"""
Removes an existing custom event by destroying the MySQL tables and
the event data that might be around. Use with caution!
@param id: Human-readable id of the event to be removed.
@type id: str
@return: A status message
@type: str
"""
if
id
is
None
:
return
"Please specify an existing event id."
# Check if the specified id exists
if
len
(
run_sql
(
"SELECT NULL FROM staEVENT WHERE id =
%s
"
,
(
id
,)))
==
0
:
return
"Event id [
%s
] doesn't exist! Aborted."
%
id
else
:
tbl_name
=
get_customevent_table
(
id
)
run_sql
(
"DROP TABLE
%s
"
%
tbl_name
)
run_sql
(
"DELETE FROM staEVENT WHERE id =
%s
"
,
(
id
,))
return
(
"Event with id [
%s
] was successfully destroyed.
\n
"
+
"Table [
%s
], with content, was destroyed."
)
%
(
id
,
tbl_name
)
def
register_customevent
(
id
,
*
arguments
):
"""
Registers a custom event. Will add to the database's event tables
as created by create_customevent().
This function constitutes the "function hook" that should be
called throughout CDS Invenio where one wants to register a
custom event! Refer to the help section on the admin web page.
@param id: Human-readable id of the event to be registered
@type id: str
@param *arguments: The rest of the parameters of the function call
@type *arguments: [params]
"""
res
=
run_sql
(
"SELECT CONCAT('staEVENT', number),cols FROM staEVENT WHERE id =
%s
"
,
(
id
,))
if
not
res
:
return
# the id does not exist
tbl_name
=
res
[
0
][
0
]
if
res
[
0
][
1
]:
col_titles
=
cPickle
.
loads
(
res
[
0
][
1
])
else
:
col_titles
=
[]
if
len
(
col_titles
)
!=
len
(
arguments
[
0
]):
return
# there is different number of arguments than cols
# Make sql query
if
len
(
arguments
[
0
])
!=
0
:
sql_param
=
[]
sql_query
=
[
"INSERT INTO
%s
("
%
tbl_name
]
for
title
in
col_titles
:
sql_query
.
append
(
"`
%s
`"
%
title
)
sql_query
.
append
(
","
)
sql_query
.
pop
()
# del the last ','
sql_query
.
append
(
") VALUES ("
)
for
argument
in
arguments
[
0
]:
sql_query
.
append
(
"
%s
"
)
sql_query
.
append
(
","
)
sql_param
.
append
(
argument
)
sql_query
.
pop
()
# del the last ','
sql_query
.
append
(
")"
)
sql_str
=
''
.
join
(
sql_query
)
run_sql
(
sql_str
,
tuple
(
sql_param
))
else
:
run_sql
(
"INSERT INTO
%s
() VALUES ()"
%
tbl_name
)
def
cache_keyevent_trend
(
ids
=
[]):
"""
Runs the rawdata gatherer for the specific key events.
Intended to be run mainly but the BibSched daemon interface.
For a specific id, all possible timespans' rawdata is gathered.
@param ids: The key event ids that are subject to caching.
@type ids: []
"""
args
=
{}
timespans
=
_get_timespans
()
for
id
in
ids
:
args
[
'id'
]
=
id
extraparams
=
KEYEVENT_REPOSITORY
[
id
][
'extraparams'
]
# Construct all combinations of extraparams and store as [{param name: arg value}]
# so as we can loop over them and just pattern-replace the each dictionary against
# the KEYEVENT_REPOSITORY['id']['cachefilename'].
combos
=
[[]]
for
x
in
[[(
param
,
x
[
0
])
for
x
in
extraparams
[
param
][
1
]()]
for
param
in
extraparams
]:
combos
=
[
i
+
[
y
]
for
y
in
x
for
i
in
combos
]
combos
=
[
dict
(
x
)
for
x
in
combos
]
for
i
in
range
(
len
(
timespans
)):
# Get timespans parameters
args
[
'timespan'
]
=
timespans
[
i
][
0
]
args
.
update
({
't_start'
:
timespans
[
i
][
2
],
't_end'
:
timespans
[
i
][
3
],
'granularity'
:
timespans
[
i
][
4
],
't_format'
:
timespans
[
i
][
5
],
'xtic_format'
:
timespans
[
i
][
6
]
})
for
combo
in
combos
:
args
.
update
(
combo
)
# Create unique filename for this combination of parameters
filename
=
KEYEVENT_REPOSITORY
[
id
][
'cachefilename'
]
\
%
dict
([(
param
,
re
.
subn
(
"[^\w]"
,
"_"
,
args
[
param
])[
0
])
for
param
in
args
])
# Create closure of gatherer function in case cache needs to be refreshed
gatherer
=
lambda
:
KEYEVENT_REPOSITORY
[
id
][
'gatherer'
](
args
)
# Get data file from cache, ALWAYS REFRESH DATA!
_get_file_using_cache
(
filename
,
gatherer
,
True
)
.
read
()
return
True
def
cache_customevent_trend
(
ids
=
[]):
"""
Runs the rawdata gatherer for the specific custom events.
Intended to be run mainly but the BibSched daemon interface.
For a specific id, all possible timespans' rawdata is gathered.
@param ids: The custom event ids that are subject to caching.
@type ids: []
"""
args
=
{}
timespans
=
_get_timespans
()
for
id
in
ids
:
args
[
'id'
]
=
id
args
[
'cols'
]
=
[]
for
i
in
range
(
len
(
timespans
)):
# Get timespans parameters
args
[
'timespan'
]
=
timespans
[
i
][
0
]
args
.
update
({
't_start'
:
timespans
[
i
][
2
],
't_end'
:
timespans
[
i
][
3
],
'granularity'
:
timespans
[
i
][
4
],
't_format'
:
timespans
[
i
][
5
],
'xtic_format'
:
timespans
[
i
][
6
]
})
# Create unique filename for this combination of parameters
filename
=
"webstat_customevent_
%(id)s
_
%(timespan)s
"
\
%
{
'id'
:
re
.
subn
(
"[^\w]"
,
"_"
,
id
)[
0
],
'timespan'
:
re
.
subn
(
"[^\w]"
,
"_"
,
args
[
'timespan'
])[
0
]
}
# Create closure of gatherer function in case cache needs to be refreshed
gatherer
=
lambda
:
get_customevent_trend
(
args
)
# Get data file from cache, ALWAYS REFRESH DATA!
_get_file_using_cache
(
filename
,
gatherer
,
True
)
.
read
()
return
True
def
basket_display
():
"""
Display basket statistics.
"""
tbl_name
=
get_customevent_table
(
"baskets"
)
if
not
tbl_name
:
# custom event baskets not defined, so return empty output:
return
[]
try
:
res
=
run_sql
(
"SELECT creation_time FROM
%s
ORDER BY creation_time"
%
tbl_name
)
days
=
(
res
[
-
1
][
0
]
-
res
[
0
][
0
])
.
days
+
1
public
=
run_sql
(
"SELECT COUNT(*) FROM
%s
WHERE action = 'display_public'"
%
tbl_name
)[
0
][
0
]
users
=
run_sql
(
"SELECT COUNT(DISTINCT user) FROM
%s
"
%
tbl_name
)[
0
][
0
]
adds
=
run_sql
(
"SELECT COUNT(*) FROM
%s
WHERE action = 'add'"
%
tbl_name
)[
0
][
0
]
displays
=
run_sql
(
"SELECT COUNT(*) FROM
%s
WHERE action = 'display' OR action = 'display_public'"
%
tbl_name
)[
0
][
0
]
hits
=
adds
+
displays
average
=
hits
/
days
res
=
[(
"Basket page hits"
,
hits
)]
res
.
append
((
" Average per day"
,
average
))
res
.
append
((
" Unique users"
,
users
))
res
.
append
((
" Additions"
,
adds
))
res
.
append
((
" Public"
,
public
))
except
IndexError
:
res
=
[]
return
res
def
alert_display
():
"""
Display alert statistics.
"""
tbl_name
=
get_customevent_table
(
"alerts"
)
if
not
tbl_name
:
# custom event alerts not defined, so return empty output:
return
[]
try
:
res
=
run_sql
(
"SELECT creation_time FROM
%s
ORDER BY creation_time"
%
tbl_name
)
days
=
(
res
[
-
1
][
0
]
-
res
[
0
][
0
])
.
days
+
1
res
=
run_sql
(
"SELECT COUNT(DISTINCT user),COUNT(*) FROM
%s
"
%
tbl_name
)
users
=
res
[
0
][
0
]
hits
=
res
[
0
][
1
]
displays
=
run_sql
(
"SELECT COUNT(*) FROM
%s
WHERE action = 'list'"
%
tbl_name
)[
0
][
0
]
search
=
run_sql
(
"SELECT COUNT(*) FROM
%s
WHERE action = 'display'"
%
tbl_name
)[
0
][
0
]
average
=
hits
/
days
res
=
[(
"Alerts page hits"
,
hits
)]
res
.
append
((
" Average per day"
,
average
))
res
.
append
((
" Unique users"
,
users
))
res
.
append
((
" Displays"
,
displays
))
res
.
append
((
" Searches history display"
,
search
))
except
IndexError
:
res
=
[]
return
res
def
get_url_customevent
(
url_dest
,
id
,
*
arguments
):
"""
Get an url for registers a custom event. Every time is load the
url will register a customevent as register_customevent().
@param id: Human-readable id of the event to be registered
@type id: str
@param *arguments: The rest of the parameters of the function call
the param "WEBSTAT_IP" will tell webstat that here
should be the IP who request the url
@type *arguments: [params]
@param id: url to redirect after register the event
@type id: str
@return: url for register event
@type: str
"""
return
"
%s
/stats/customevent_register?id=
%s
&arg=
%s
&url=
%s
"
%
\
(
CFG_SITE_URL
,
id
,
','
.
join
(
arguments
[
0
]),
quote
(
url_dest
))
# WEB
def
perform_request_index
(
ln
=
CFG_SITE_LANG
):
"""
Displays some informative text, the health box, and a the list of
key/custom events.
"""
from
ConfigParser
import
ConfigParser
conf
=
ConfigParser
()
conf
.
read
(
CFG_WEBSTAT_CONFIG_PATH
)
out
=
TEMPLATES
.
tmpl_welcome
(
ln
=
ln
)
# Prepare the health base data
health_indicators
=
[]
now
=
datetime
.
datetime
.
now
()
yesterday
=
(
now
-
datetime
.
timedelta
(
days
=
1
))
.
strftime
(
"%Y-%m-
%d
"
)
today
=
now
.
strftime
(
"%Y-%m-
%d
"
)
tomorrow
=
(
now
+
datetime
.
timedelta
(
days
=
1
))
.
strftime
(
"%Y-%m-
%d
"
)
# Append session information to the health box
if
conf
.
get
(
"general"
,
"visitors_box"
)
==
"True"
:
sess
=
get_keyevent_snapshot_sessions
()
health_indicators
.
append
((
"Total active visitors"
,
sum
(
sess
)))
health_indicators
.
append
((
" Logged in"
,
sess
[
1
]))
health_indicators
.
append
(
None
)
# Append searches information to the health box
if
conf
.
get
(
"general"
,
"search_box"
)
==
"True"
:
args
=
{
't_start'
:
today
,
't_end'
:
tomorrow
,
'granularity'
:
"day"
,
't_format'
:
"%Y-%m-
%d
"
}
searches
=
get_keyevent_trend_search_type_distribution
(
args
)
health_indicators
.
append
((
"Searches since midnight"
,
sum
(
searches
[
0
][
1
])))
health_indicators
.
append
((
" Simple"
,
searches
[
0
][
1
][
0
]))
health_indicators
.
append
((
" Advanced"
,
searches
[
0
][
1
][
1
]))
health_indicators
.
append
(
None
)
# Append new records information to the health box
if
conf
.
get
(
"general"
,
"record_box"
)
==
"True"
:
args
=
{
'collection'
:
CFG_SITE_NAME
,
't_start'
:
today
,
't_end'
:
tomorrow
,
'granularity'
:
"day"
,
't_format'
:
"%Y-%m-
%d
"
}
try
:
tot_records
=
get_keyevent_trend_collection_population
(
args
)[
0
][
1
]
except
IndexError
:
tot_records
=
0
args
=
{
'collection'
:
CFG_SITE_NAME
,
't_start'
:
yesterday
,
't_end'
:
today
,
'granularity'
:
"day"
,
't_format'
:
"%Y-%m-
%d
"
}
try
:
new_records
=
tot_records
-
get_keyevent_trend_collection_population
(
args
)[
0
][
1
]
except
IndexError
:
new_records
=
0
health_indicators
.
append
((
"Total records"
,
tot_records
))
health_indicators
.
append
((
" New records since midnight"
,
new_records
))
health_indicators
.
append
(
None
)
# Append status of BibSched queue to the health box
if
conf
.
get
(
"general"
,
"bibsched_box"
)
==
"True"
:
bibsched
=
get_keyevent_snapshot_bibsched_status
()
health_indicators
.
append
((
"BibSched queue"
,
sum
([
x
[
1
]
for
x
in
bibsched
])))
for
item
in
bibsched
:
health_indicators
.
append
((
" "
+
item
[
0
],
str
(
item
[
1
])))
health_indicators
.
append
(
None
)
# Append basket stats to the health box
if
conf
.
get
(
"general"
,
"basket_box"
)
==
"True"
:
health_indicators
+=
basket_display
()
health_indicators
.
append
(
None
)
# Append alerts stats to the health box
if
conf
.
get
(
"general"
,
"alert_box"
)
==
"True"
:
health_indicators
+=
alert_display
()
health_indicators
.
append
(
None
)
# Append number of Apache processes to the health box
if
conf
.
get
(
"general"
,
"apache_box"
)
==
"True"
:
health_indicators
.
append
((
"Apache processes"
,
get_keyevent_snapshot_apache_processes
()))
# Append uptime and load average to the health box
if
conf
.
get
(
"general"
,
"uptime_box"
)
==
"True"
:
health_indicators
.
append
((
"Uptime cmd"
,
get_keyevent_snapshot_uptime_cmd
()))
# Display the health box
out
+=
TEMPLATES
.
tmpl_system_health
(
health_indicators
,
ln
=
ln
)
# Produce a list of the key statistics
out
+=
TEMPLATES
.
tmpl_keyevent_list
(
ln
=
ln
)
# Display the custom statistics
out
+=
TEMPLATES
.
tmpl_customevent_list
(
_get_customevents
(),
ln
=
ln
)
return
out
def
perform_display_keyevent
(
id
=
None
,
args
=
{},
req
=
None
,
ln
=
CFG_SITE_LANG
):
"""
Display key events using a certain output type over the given time span.
@param ids: The ids for the custom events that are to be displayed.
@type ids: [str]
@param args: { param name: argument value }
@type args: { str: str }
@param req: The Apache request object, necessary for export redirect.
@type req:
"""
# Get all the option lists: { parameter name: [(argument internal name, argument full name)]}
options
=
dict
([(
param
,
(
KEYEVENT_REPOSITORY
[
id
][
'extraparams'
][
param
][
0
],
KEYEVENT_REPOSITORY
[
id
][
'extraparams'
][
param
][
1
]()))
for
param
in
KEYEVENT_REPOSITORY
[
id
][
'extraparams'
]]
+
[(
'timespan'
,
(
'Time span'
,
_get_timespans
())),
(
'format'
,
(
'Output format'
,
_get_formats
()))])
# Order of options
order
=
[
param
for
param
in
KEYEVENT_REPOSITORY
[
id
][
'extraparams'
]]
+
[
'timespan'
,
'format'
]
# Build a dictionary for the selected parameters: { parameter name: argument internal name }
choosed
=
dict
([(
param
,
args
[
param
])
for
param
in
KEYEVENT_REPOSITORY
[
id
][
'extraparams'
]]
+
[(
'timespan'
,
args
[
'timespan'
]),
(
'format'
,
args
[
'format'
])])
# Send to template to prepare event customization FORM box
out
=
TEMPLATES
.
tmpl_keyevent_box
(
options
,
order
,
choosed
,
ln
=
ln
)
# Arguments OK?
# Check for existance. If nothing, only show FORM box from above.
if
len
(
choosed
)
==
0
:
return
out
# Make sure extraparams are valid, if any
for
param
in
choosed
:
if
not
choosed
[
param
]
in
[
x
[
0
]
for
x
in
options
[
param
][
1
]]:
return
out
+
TEMPLATES
.
tmpl_error
(
'Please specify a valid value for parameter "
%s
".'
%
options
[
param
][
0
],
ln
=
ln
)
# Arguments OK beyond this point!
# Get unique name for caching purposes (make sure that the params used in the filename are safe!)
filename
=
KEYEVENT_REPOSITORY
[
id
][
'cachefilename'
]
\
%
dict
([(
param
,
re
.
subn
(
"[^\w]"
,
"_"
,
choosed
[
param
])[
0
])
for
param
in
choosed
]
+
[(
'id'
,
re
.
subn
(
"[^\w]"
,
"_"
,
id
)[
0
])])
# Get time parameters from repository
# TODO: This should quite possibly be lifted out (webstat_engine?), in any case a cleaner repository
_
,
t_fullname
,
t_start
,
t_end
,
granularity
,
t_format
,
xtic_format
=
\
options
[
'timespan'
][
1
][[
x
[
0
]
for
x
in
options
[
'timespan'
][
1
]]
.
index
(
choosed
[
'timespan'
])]
args
=
{
't_start'
:
t_start
,
't_end'
:
t_end
,
'granularity'
:
granularity
,
't_format'
:
t_format
,
'xtic_format'
:
xtic_format
}
for
param
in
KEYEVENT_REPOSITORY
[
id
][
'extraparams'
]:
args
[
param
]
=
choosed
[
param
]
# Create closure of frequency function in case cache needs to be refreshed
gatherer
=
lambda
:
KEYEVENT_REPOSITORY
[
id
][
'gatherer'
](
args
)
# Determine if this particular file is due for scheduling cacheing, in that case we must not
# allow refreshing of the rawdata.
allow_refresh
=
not
_is_scheduled_for_cacheing
(
id
)
# Get data file from cache (refresh if necessary)
data
=
eval
(
_get_file_using_cache
(
filename
,
gatherer
,
allow_refresh
=
allow_refresh
)
.
read
())
# If type indicates an export, run the export function and we're done
if
_is_type_export
(
choosed
[
'format'
]):
_get_export_closure
(
choosed
[
'format'
])(
data
,
req
)
return
out
# Prepare the graph settings that are being passed on to grapher
settings
=
{
"title"
:
KEYEVENT_REPOSITORY
[
id
][
'specificname'
]
%
choosed
,
"xlabel"
:
t_fullname
+
' ('
+
granularity
+
')'
,
"ylabel"
:
KEYEVENT_REPOSITORY
[
id
][
'ylabel'
],
"xtic_format"
:
xtic_format
,
"format"
:
choosed
[
'format'
],
"multiple"
:
KEYEVENT_REPOSITORY
[
id
][
'multiple'
]
}
return
out
+
_perform_display_event
(
data
,
os
.
path
.
basename
(
filename
),
settings
,
ln
=
ln
)
def
perform_display_customevent
(
ids
=
[],
args
=
{},
req
=
None
,
ln
=
CFG_SITE_LANG
):
"""
Display custom events using a certain output type over the given time span.
@param ids: The ids for the custom events that are to be displayed.
@type ids: [str]
@param args: { param name: argument value }
@type args: { str: str }
@param req: The Apache request object, necessary for export redirect.
@type req:
"""
# Get all the option lists: { parameter name: [(argument internal name, argument full name)]}
cols_dict
=
_get_customevent_cols
()
cols_dict
[
'__header'
]
=
'Argument'
cols_dict
[
'__none'
]
=
[]
options
=
{
'ids'
:
(
'Custom event'
,
_get_customevents
()),
'timespan'
:
(
'Time span'
,
_get_timespans
()),
'format'
:
(
'Output format'
,
_get_formats
(
True
)),
'cols'
:
cols_dict
}
# Build a dictionary for the selected parameters: { parameter name: argument internal name }
choosed
=
{
'ids'
:
args
[
'ids'
],
'timespan'
:
args
[
'timespan'
],
'format'
:
args
[
'format'
]}
# Calculate cols
index
=
[]
for
key
in
args
.
keys
():
if
key
[:
4
]
==
'cols'
:
index
.
append
(
key
[
4
:])
index
.
sort
()
choosed
[
'cols'
]
=
[
zip
([
""
]
+
args
[
'bool'
+
i
],
args
[
'cols'
+
i
],
args
[
'col_value'
+
i
])
for
i
in
index
]
# Send to template to prepare event customization FORM box
out
=
TEMPLATES
.
tmpl_customevent_box
(
options
,
choosed
,
ln
=
ln
)
# Arguments OK?
# Make sure extraparams are valid, if any
for
param
in
[
'ids'
,
'timespan'
,
'format'
]:
legalvalues
=
[
x
[
0
]
for
x
in
options
[
param
][
1
]]
if
type
(
args
[
param
])
is
list
:
# If the argument is a list, like the content of 'ids' every value has to be checked
if
len
(
args
[
param
])
==
0
:
return
out
+
TEMPLATES
.
tmpl_error
(
'Please specify a valid value for parameter "
%s
".'
%
options
[
param
][
0
],
ln
=
ln
)
for
arg
in
args
[
param
]:
if
not
arg
in
legalvalues
:
return
out
+
TEMPLATES
.
tmpl_error
(
'Please specify a valid value for parameter "
%s
".'
%
options
[
param
][
0
],
ln
=
ln
)
else
:
if
not
args
[
param
]
in
legalvalues
:
return
out
+
TEMPLATES
.
tmpl_error
(
'Please specify a valid value for parameter "
%s
".'
%
options
[
param
][
0
],
ln
=
ln
)
# Fetch time parameters from repository
_
,
t_fullname
,
t_start
,
t_end
,
granularity
,
t_format
,
xtic_format
=
\
options
[
'timespan'
][
1
][[
x
[
0
]
for
x
in
options
[
'timespan'
][
1
]]
.
index
(
choosed
[
'timespan'
])]
args_req
=
{
't_start'
:
t_start
,
't_end'
:
t_end
,
'granularity'
:
granularity
,
't_format'
:
t_format
,
'xtic_format'
:
xtic_format
}
data_unmerged
=
[]
# ASCII dump data is different from the standard formats
if
choosed
[
'format'
]
==
'asciidump'
:
for
i
in
[
str
(
j
)
for
j
in
range
(
len
(
ids
))]:
args
[
'bool'
+
i
]
.
insert
(
0
,
""
)
args_req
[
'cols'
+
i
]
=
zip
(
args
[
'bool'
+
i
],
args
[
'cols'
+
i
],
args
[
'col_value'
+
i
])
filename
=
"webstat_customevent_"
+
re
.
subn
(
"[^\w]"
,
""
,
''
.
join
(
ids
)
+
"_"
+
choosed
[
'timespan'
]
+
"_"
+
'-'
.
join
([
':'
.
join
(
col
)
for
col
in
[
args
[
'cols'
+
str
(
i
)]
for
i
in
range
(
len
(
ids
))]])
+
"_asciidump"
)[
0
]
args_req
[
'ids'
]
=
ids
gatherer
=
lambda
:
get_customevent_dump
(
args_req
)
data
=
eval
(
_get_file_using_cache
(
filename
,
gatherer
)
.
read
())
else
:
for
id
,
i
in
[
(
ids
[
i
],
str
(
i
))
for
i
in
range
(
len
(
ids
))]:
# Calculate cols
args_req
[
'cols'
]
=
choosed
[
'cols'
][
int
(
i
)]
# Get unique name for the rawdata file (wash arguments!)
filename
=
"webstat_customevent_"
+
re
.
subn
(
"[^\w]"
,
""
,
id
+
"_"
+
choosed
[
'timespan'
]
+
"_"
+
'-'
.
join
([
':'
.
join
(
col
)
for
col
in
args_req
[
'cols'
]]))[
0
]
# Add the current id to the gatherer's arguments
args_req
[
'id'
]
=
id
# Prepare raw data gatherer, if cache needs refreshing.
gatherer
=
lambda
:
get_customevent_trend
(
args_req
)
# Determine if this particular file is due for scheduling cacheing, in that case we must not
# allow refreshing of the rawdata.
allow_refresh
=
not
_is_scheduled_for_cacheing
(
id
)
# Get file from cache, and evaluate it to trend data
data_unmerged
.
append
(
eval
(
_get_file_using_cache
(
filename
,
gatherer
,
allow_refresh
=
allow_refresh
)
.
read
()))
# Merge data from the unmerged trends into the final destination
data
=
[(
x
[
0
][
0
],
tuple
([
y
[
1
]
for
y
in
x
]))
for
x
in
zip
(
*
data_unmerged
)]
# If type indicates an export, run the export function and we're done
if
_is_type_export
(
args
[
'format'
]):
_get_export_closure
(
args
[
'format'
])(
data
,
req
)
return
out
# Get full names, for those that have them
names
=
[]
events
=
_get_customevents
()
for
id
in
ids
:
temp
=
events
[[
x
[
0
]
for
x
in
events
]
.
index
(
id
)]
if
temp
[
1
]
!=
None
:
names
.
append
(
temp
[
1
])
else
:
names
.
append
(
temp
[
0
])
# Generate a filename for the graph
filename
=
"tmp_webstat_customevent_"
+
''
.
join
([
re
.
subn
(
"[^\w]"
,
""
,
id
)[
0
]
for
id
in
ids
])
+
"_"
+
choosed
[
'timespan'
]
settings
=
{
"title"
:
'Custom event'
,
"xlabel"
:
t_fullname
+
' ('
+
granularity
+
')'
,
"ylabel"
:
"Action quantity"
,
"xtic_format"
:
xtic_format
,
"format"
:
choosed
[
'format'
],
"multiple"
:
(
type
(
ids
)
is
list
)
and
names
or
[]
}
return
out
+
_perform_display_event
(
data
,
os
.
path
.
basename
(
filename
),
settings
,
ln
=
ln
)
def
perform_display_customevent_help
(
ln
=
CFG_SITE_LANG
):
"""Display the custom event help"""
return
TEMPLATES
.
tmpl_customevent_help
(
ln
=
ln
)
# INTERNALS
def
_perform_display_event
(
data
,
name
,
settings
,
ln
=
CFG_SITE_LANG
):
"""
Retrieves a graph.
@param data: The trend/dump data
@type data: [(str, str|int|(str|int,...))] | [(str|int,...)]
@param name: The name of the trend (to be used as basename of graph file)
@type name: str
@param settings: Dictionary of graph parameters
@type settings: dict
@return: The URL of the graph (ASCII or image)
@type: str
"""
path
=
WEBSTAT_GRAPH_DIRECTORY
+
"tmp_"
+
name
# Generate, and insert using the appropriate template
if
settings
[
"format"
]
!=
"asciidump"
:
create_graph_trend
(
data
,
path
,
settings
)
if
settings
[
"format"
]
==
"asciiart"
:
return
TEMPLATES
.
tmpl_display_event_trend_ascii
(
settings
[
"title"
],
path
,
ln
=
ln
)
else
:
if
settings
[
"format"
]
==
"gnuplot"
:
try
:
import
Gnuplot
except
ImportError
:
return
'Gnuplot is not installed. Returning ASCII art.'
+
\
TEMPLATES
.
tmpl_display_event_trend_ascii
(
settings
[
"title"
],
path
,
ln
=
ln
)
return
TEMPLATES
.
tmpl_display_event_trend_image
(
settings
[
"title"
],
path
,
ln
=
ln
)
else
:
path
+=
"_asciidump"
create_graph_dump
(
data
,
path
,
settings
)
return
TEMPLATES
.
tmpl_display_event_trend_ascii
(
settings
[
"title"
],
path
,
ln
=
ln
)
def
_get_customevents
():
"""
Retrieves registered custom events from the database.
@return: [(internal name, readable name)]
@type: [(str, str)]
"""
return
[(
x
[
0
],
x
[
1
])
for
x
in
run_sql
(
"SELECT id, name FROM staEVENT"
)]
def
_get_timespans
(
dt
=
None
):
"""
Helper function that generates possible time spans to be put in the
drop-down in the generation box. Computes possible years, and also some
pre-defined simpler values. Some items in the list returned also tweaks the
output graph, if any, since such values are closely related to the nature
of the time span.
@param dt: A datetime object indicating the current date and time
@type dt: datetime.datetime
@return: [(Internal name, Readable name, t_start, t_end, granularity, format, xtic_format)]
@type [(str, str, str, str, str, str, str)]
"""
if
dt
is
None
:
dt
=
datetime
.
datetime
.
now
()
format
=
"%Y-%m-
%d
"
# Helper function to return a timediff object reflecting a diff of x days
d_diff
=
lambda
x
:
datetime
.
timedelta
(
days
=
x
)
# Helper function to return the number of days in the month x months ago
d_in_m
=
lambda
x
:
calendar
.
monthrange
(((
dt
.
month
-
x
<
1
)
and
dt
.
year
-
1
or
dt
.
year
),
(((
dt
.
month
-
1
)
-
x
)
%
12
+
1
))[
1
]
to_str
=
lambda
x
:
x
.
strftime
(
format
)
dt_str
=
to_str
(
dt
)
spans
=
[(
"today"
,
"Today"
,
dt_str
,
to_str
(
dt
+
d_diff
(
1
)),
"hour"
,
format
,
"%H"
),
(
"this week"
,
"This week"
,
to_str
(
dt
-
d_diff
(
dt
.
weekday
())),
to_str
(
dt
+
d_diff
(
1
)),
"day"
,
format
,
"%a"
),
(
"last week"
,
"Last week"
,
to_str
(
dt
-
d_diff
(
dt
.
weekday
()
+
7
)),
to_str
(
dt
-
d_diff
(
dt
.
weekday
())),
"day"
,
format
,
"%a"
),
(
"this month"
,
"This month"
,
to_str
(
dt
-
d_diff
(
dt
.
day
)
+
d_diff
(
1
)),
to_str
(
dt
+
d_diff
(
1
)),
"day"
,
format
,
"
%d
"
),
(
"last month"
,
"Last month"
,
to_str
(
dt
-
d_diff
(
d_in_m
(
1
))
-
d_diff
(
dt
.
day
)
+
d_diff
(
1
)),
to_str
(
dt
-
d_diff
(
dt
.
day
)
+
d_diff
(
1
)),
"day"
,
format
,
"
%d
"
),
(
"last three months"
,
"Last three months"
,
to_str
(
dt
-
d_diff
(
d_in_m
(
1
))
-
d_diff
(
d_in_m
(
2
))
-
d_diff
(
dt
.
day
)
+
d_diff
(
1
)),
dt_str
,
"month"
,
format
,
"%b"
),
(
"last year"
,
"Last year"
,
to_str
(
dt
.
replace
(
year
=
dt
.
year
-
1
,
month
=
(
dt
.
month
+
1
)
%
12
,
day
=
1
)),
to_str
(
dt
.
replace
(
month
=
(
dt
.
month
+
1
)
%
12
,
day
=
1
)),
"month"
,
format
,
"%b"
)]
# Get first year as indicated by the content's in bibrec
try
:
y1
=
run_sql
(
"SELECT creation_date FROM bibrec ORDER BY creation_date LIMIT 1"
)[
0
][
0
]
.
year
except
IndexError
:
y1
=
dt
.
year
y2
=
time
.
localtime
()[
0
]
diff_year
=
y2
-
y1
if
diff_year
>=
2
:
spans
.
append
((
"last 2 years"
,
"Last 2 years"
,
to_str
(
dt
.
replace
(
year
=
dt
.
year
-
2
,
month
=
(
dt
.
month
+
1
)
%
12
)),
to_str
(
dt
.
replace
(
month
=
(
dt
.
month
+
1
)
%
12
)),
"month"
,
format
,
"%b"
))
if
diff_year
>=
5
:
spans
.
append
((
"last 5 years"
,
"Last 5 years"
,
to_str
(
dt
.
replace
(
year
=
dt
.
year
-
5
,
month
=
1
)),
to_str
(
dt
.
replace
(
month
=
(
dt
.
month
+
1
)
%
12
)),
"year"
,
format
,
"%Y"
))
if
diff_year
>=
10
:
spans
.
append
((
"last 10 years"
,
"Last 10 years"
,
to_str
(
dt
.
replace
(
year
=
dt
.
year
-
10
,
month
=
1
)),
to_str
(
dt
.
replace
(
month
=
(
dt
.
month
+
1
)
%
12
)),
"year"
,
format
,
"%Y"
))
spans
.
append
((
"full history"
,
"Full history"
,
str
(
y1
),
str
(
y2
+
1
),
"year"
,
"%Y"
,
"%Y"
))
spans
.
extend
([(
str
(
x
),
str
(
x
),
str
(
x
),
str
(
x
+
1
),
"month"
,
"%Y"
,
"%b"
)
for
x
in
range
(
y2
,
y1
-
1
,
-
1
)])
return
spans
def
_get_formats
(
with_dump
=
False
):
"""
Helper function to retrieve a CDS Invenio friendly list of all possible
output types (displaying and exporting) from the central repository as
stored in the variable self.types at the top of this module.
@param with_dump: Optionally displays the custom-event only type 'asciidump'
@type with_dump: bool
@return: [(Internal name, Readable name)]
@type [(str, str)]
"""
# The third tuple value is internal
if
with_dump
:
return
[(
x
[
0
],
x
[
1
])
for
x
in
TYPE_REPOSITORY
]
else
:
return
[(
x
[
0
],
x
[
1
])
for
x
in
TYPE_REPOSITORY
if
x
[
0
]
!=
'asciidump'
]
def
_get_customevent_cols
(
id
=
""
):
"""
List of all the diferent name of columns in customevents.
@return: {id: [(internal name, readable name)]}
@type: {str: [(str, str)]}
"""
sql_str
=
"SELECT id,cols FROM staEVENT"
sql_param
=
[]
if
id
:
sql_str
+=
"WHERE id =
%s
"
sql_param
.
append
(
id
)
cols
=
{}
for
x
in
run_sql
(
sql_str
,
sql_param
):
if
x
[
0
]:
if
x
[
1
]:
cols
[
x
[
0
]]
=
[
(
name
,
name
)
for
name
in
cPickle
.
loads
(
x
[
1
])
]
else
:
cols
[
x
[
0
]]
=
[]
return
cols
def
_is_type_export
(
typename
):
"""
Helper function that consults the central repository of types to determine
whether the input parameter represents an export type.
@param typename: Internal type name
@type typename: str
@return: Information whether a certain type exports data
@type: bool
"""
return
len
(
TYPE_REPOSITORY
[[
x
[
0
]
for
x
in
TYPE_REPOSITORY
]
.
index
(
typename
)])
==
3
def
_get_export_closure
(
typename
):
"""
Helper function that for a certain type, gives back the corresponding export
closure.
@param typename: Internal type name
@type type: str
@return: Closure that exports data to the type's format
@type: function
"""
return
TYPE_REPOSITORY
[[
x
[
0
]
for
x
in
TYPE_REPOSITORY
]
.
index
(
typename
)][
2
]
def
_get_file_using_cache
(
filename
,
closure
,
force
=
False
,
allow_refresh
=
True
):
"""
Uses the CDS Invenio cache, i.e. the tempdir, to see if there's a recent
cached version of the sought-after file in there. If not, use the closure to
compute a new, and return that instead. Relies on CDS Invenio configuration
parameter WEBSTAT_CACHE_INTERVAL.
@param filename: The name of the file that might be cached
@type filename: str
@param closure: A function, that executed will return data to be cached. The
function should return either a string, or something that
makes sense after being interpreted with str().
@type closure: function
@param force: Override cache default value.
@type force: bool
"""
# Absolute path to cached files, might not exist.
filename
=
os
.
path
.
normpath
(
WEBSTAT_RAWDATA_DIRECTORY
+
filename
)
# Get the modification time of the cached file (if any).
try
:
mtime
=
os
.
path
.
getmtime
(
filename
)
except
OSError
:
# No cached version of this particular file exists, thus the modification
# time is set to 0 for easy logic below.
mtime
=
0
# Consider refreshing cache if FORCE or NO CACHE AT ALL, or CACHE EXIST AND REFRESH IS ALLOWED.
if
force
or
mtime
==
0
or
(
mtime
>
0
and
allow_refresh
):
# Is the file modification time recent enough?
if
force
or
(
time
.
time
()
-
mtime
>
WEBSTAT_CACHE_INTERVAL
):
# No! Use closure to compute new content
content
=
closure
()
# Cache the data
open
(
filename
,
'w'
)
.
write
(
str
(
content
))
# Return the (perhaps just) cached file
return
open
(
filename
,
'r'
)
def
_is_scheduled_for_cacheing
(
id
):
"""
@param id: The event id
@type id: str
@return: Indication of if the event id is scheduling for BibSched execution.
@type: bool
"""
if
not
is_task_scheduled
(
'webstatadmin'
):
return
False
# Get the task id
try
:
task_id
=
get_task_ids_by_descending_date
(
'webstatadmin'
,
[
'RUNNING'
,
'WAITING'
])[
0
]
except
IndexError
:
return
False
else
:
args
=
get_task_options
(
task_id
)
return
id
in
(
args
[
'keyevents'
]
+
args
[
'customevents'
])
Event Timeline
Log In to Comment