Page MenuHomec4science

bibstat.in
No OneTemporary

File Metadata

Created
Fri, May 3, 15:09

bibstat.in

#!@PYTHON@
## -*- mode: python; coding: utf-8; -*-
##
## This file is part of Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BibStat reports some interesting numbers on the bibliographic record set.
"""
__revision__ = "$Id$"
## import interesting modules:
try:
import sys
from invenio.dbquery import run_sql, get_table_status_info
from invenio.config import CFG_DATABASE_HOST, CFG_DATABASE_PORT, \
CFG_DATABASE_NAME
import getopt
import time
except ImportError, e:
print "Error: %s" % e
import sys
sys.exit(1)
def report_table_status(tablename):
"""Report stats for the table TABLENAME. If TABLENAME does not
exists, return empty string.
"""
out = ""
table_info = get_table_status_info(tablename)
if table_info:
out = "%14s %17d %17d %17d" % (table_info['Name'],
table_info['Rows'],
table_info['Data_length'],
table_info['Max_data_length']
)
return out
def report_definitions_of_physical_tags():
"""
Report definitions of physical MARC tags.
"""
print "### 1 - PHYSICAL TAG DEFINITIONS"
print
print "# MARC tag ... description"
res = run_sql('SELECT id,value,name FROM tag ORDER BY value')
for row in res:
(dummytagid, tagvalue, tagname) = row
print "%s ... %s" % (tagvalue, tagname,)
def report_definitions_of_logical_fields():
"""
Report definitions of logical fields.
"""
print
print "### 2 - LOGICAL FIELD DEFINITIONS"
print
print "# logical field: associated physical tags",
res = run_sql('SELECT id,name,code FROM field ORDER BY code')
for row in res:
(fieldid, dummyfieldname, fieldcode) = row
print
print "%s:" % (fieldcode,),
res2 = run_sql("""SELECT value FROM tag, field_tag
WHERE id_field=%s AND id_tag=id
""", (fieldid,))
for row2 in res2:
tag = row2[0]
print tag,
print
def report_definitions_of_indexes():
"""
Report definitions of indexes.
"""
print
print "### 3 - INDEX DEFINITIONS"
print
print "# index (stemming): associated logical fields",
res = run_sql("""SELECT id,name,stemming_language FROM idxINDEX
ORDER BY name""")
for row in res:
(indexid, indexname, indexstem) = row
if indexstem:
indexname += ' (%s)' % indexstem
print
print "%s:" % (indexname,),
res2 = run_sql("""SELECT code FROM field, idxINDEX_field
WHERE id_idxINDEX=%s AND id_field=id
""", (indexid,))
for row2 in res2:
code = row2[0]
print code,
print
def report_on_all_bibliographic_tables():
"""Report stats for all the interesting bibliographic tables."""
print
print "### 4 - TABLE SPACE AND SIZE INFO"
print ''
print "# %12s %17s %17s %17s" % ("TABLE", "ROWS", "DATA SIZE", "INDEX SIZE")
for i in range(0, 10):
for j in range(0, 10):
print report_table_status("bib%1d%1dx" % (i, j))
print report_table_status("bibrec_bib%1d%1dx" % (i, j))
for i in range(0, 11):
print report_table_status("idxWORD%02dF" % i)
print report_table_status("idxWORD%02dR" % i)
for i in range(0, 11):
print report_table_status("idxPHRASE%02dF" % i)
print report_table_status("idxPHRASE%02dR" % i)
return
def report_tag_usage():
"""Analyze bibxxx tables and report info on usage of various tags."""
print ''
print "### 5 - TAG USAGE INFO"
print ''
print "# TAG NB_RECORDS\t# recID1 recID2 ... recID9 (example records)"
for i in range(0, 10):
for j in range(0, 10):
bibxxx = "bib%1d%1dx" % (i, j)
bibrec_bibxxx = 'bibrec_' + bibxxx
# detect all the various tags in use:
res = run_sql("SELECT DISTINCT(tag) FROM %s" % (bibxxx,))
for row in res:
tag = row[0]
# detect how many records have this tag in use:
res_usage = run_sql("""SELECT DISTINCT(b.id) FROM bibrec AS b,
%s AS bb, %s AS bx
WHERE b.id=bb.id_bibrec
AND bb.id_bibxxx=bx.id
AND bx.tag=%%s
""" % (bibrec_bibxxx, bibxxx),
(tag,))
# print results
print tag, (8-len(tag))*' ', len(res_usage), \
'\t\t', '#', " ".join([str(row[0]) for row in
res_usage[:9]])
def report_header():
"""
Start reporting.
"""
print '### BIBSTAT REPORT FOR DB %s:%s.%s RUN AT %s' % (CFG_DATABASE_HOST,
CFG_DATABASE_PORT,
CFG_DATABASE_NAME,
time.asctime())
print ''
def report_footer():
"""
Stop reporting.
"""
print
print
print '### END OF BIBSTAT REPORT'
def usage(exitcode=1, msg=""):
"""Prints usage info."""
if msg:
sys.stderr.write("Error: %s.\n" % msg)
sys.stderr.write("Usage: %s [options]\n" % sys.argv[0])
sys.stderr.write("General options:\n")
sys.stderr.write(" -h, --help \t\t Print this help.\n")
sys.stderr.write(" -V, --version \t\t Print version information.\n")
sys.exit(exitcode)
def main():
"""Report stats on the Invenio bibliographic tables."""
try:
opts, dummyargs = getopt.getopt(sys.argv[1:], "hV", ["help", "version"])
except getopt.GetoptError, err:
usage(1, err)
if opts:
for opt in opts:
if opt[0] in ["-h", "--help"]:
usage(0)
elif opt[0] in ["-V", "--version"]:
print __revision__
sys.exit(0)
else:
usage(1)
else:
report_header()
report_definitions_of_physical_tags()
report_definitions_of_logical_fields()
report_definitions_of_indexes()
report_on_all_bibliographic_tables()
report_tag_usage()
report_footer()
if __name__ == "__main__":
main()

Event Timeline