Page MenuHomec4science

kwalitee.py
No OneTemporary

File Metadata

Created
Fri, Nov 1, 18:10

kwalitee.py

## $Id$
##
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Check the kwalitee of the CDS Invenio Python code.
Q: What is kwalitee?
A: <http://qa.perl.org/phalanx/kwalitee.html>
Usage: python kwalitee.py <topsrcdir | file1.py file2.py ...>
Examples:
$ python kwalitee.py ~/src/cds-invenio/
$ python kwalitee.py ../../websearch/lib/*.py
"""
import os
import re
import sys
import time
__revision__ = "$Id$"
verbose = 0
def get_list_of_python_code_files(modulesdir, modulename):
"""Return list of Python source code files for MODULENAME in MODULESDIR,
excluding test files.
"""
out = []
# firstly, find out *.py files:
(dummy, pipe, dummy)= os.popen3("find %s/%s/ -name '*.py'" % \
(modulesdir, modulename))
out.extend([filename.strip() for filename in pipe.readlines()])
pipe.close()
# secondly, find out bin/*.in files:
(dummy, pipe, dummy) = os.popen3("find %s/%s/bin/ -name '*.in'" % \
(modulesdir, modulename))
out.extend([filename.strip() for filename in pipe.readlines()])
pipe.close()
# last, remove Makefile, test files, z_ files:
# pylint: disable-msg=W0141
out = filter(lambda x: not x.endswith("Makefile.in"), out)
out = filter(lambda x: not x.endswith("dbexec.in"), out)
out = filter(lambda x: not x.endswith("_tests.py"), out)
out = filter(lambda x: x.find("/z_") == -1, out)
# return list:
return out
def wash_list_of_python_files_for_pylinting(filenames):
"""Remove away some Python files that are not suitable for
pylinting, e.g. known wrong test files or empty init files.
"""
# pylint: disable-msg=W0141
# take only .py files for pylinting:
filenames = filter(lambda x: x.endswith(".py"),
filenames)
# remove empty __init__.py files (FIXME: we may check for file size here
# in case we shall have non-empty __init__.py files one day)
filenames = filter(lambda x: not x.endswith("__init__.py"),
filenames)
# take out unloadable bibformat test files:
filenames = filter(lambda x: not x.endswith("bfe_test_4.py"),
filenames)
# take out test unloadable file:
filenames = filter(lambda x: not x.endswith("test3.py"),
filenames)
# take out test no docstring file:
filenames = filter(lambda x: not x.endswith("test_5.py"),
filenames)
return filenames
def get_list_of_python_unit_test_files(modulesdir, modulename):
"""Return list of Python unit test files for MODULENAME in MODULESDIR."""
out = []
(dummy, pipe, dummy) = os.popen3("find %s/%s/ -name '*_tests.py'" % \
(modulesdir, modulename))
out.extend([filename.strip() for filename in pipe.readlines()])
pipe.close()
# pylint: disable-msg=W0141
out = filter(lambda x: not x.endswith("_regression_tests.py"), out)
return out
def get_list_of_python_regression_test_files(modulesdir, modulename):
"""Return list of Python unit test files for MODULENAME in MODULESDIR."""
out = []
(dummy, pipe, dummy) = os.popen3("find %s/%s/ -name '*_regression_tests.py'" % \
(modulesdir, modulename))
out.extend([filename.strip() for filename in pipe.readlines()])
pipe.close()
return out
def get_nb_lines_in_file(filename):
"""Return number of lines in FILENAME."""
return len(open(filename).readlines())
def get_nb_test_cases_in_file(filename):
"""Return number of test cases in FILENAME."""
(dummy, pipe, dummy) = os.popen3("grep ' def test' %s" % filename)
return len(pipe.readlines())
def get_pylint_results(filename):
"""
Run pylint and return the tuple of (nb_missing_docstrings, score,
nb_msg_convention, nb_msg_refactor, nb_msg_warning, nb_msg_error,
nb_msg_fatal) for FILENAME. If score cannot be detected, print an
error and return (-999999999, -999999999, 0, 0, 0, 0, 0).
"""
(dummy, pipe, dummy) = os.popen3("pylint %s" % filename)
pylint_output = pipe.read()
# detect number of missing docstrings:
nb_missing_docstrings = pylint_output.count(": Missing docstring")
# detect pylint score:
pylint_score = -999999999
pylint_score_matched = re.search(r'Your code has been rated at ([0-9\.\-]+)\/10',
pylint_output)
if pylint_score_matched:
pylint_score = pylint_score_matched.group(1)
else:
print "ERROR: cannot detect pylint score for %s" % filename
# detect pylint messages
nb_msg_convention = pylint_output.count("\nC:")
nb_msg_refactor = pylint_output.count("\nR:")
nb_msg_warning = pylint_output.count("\nW:")
nb_msg_error = pylint_output.count("\nE:")
nb_msg_fatal = pylint_output.count("\nF:")
# return results:
if verbose >= 9:
print "get_pylint_results(%s) = (%d, %s, %s, %s, %s, %s, %s)" % \
(filename, nb_missing_docstrings, pylint_score,
nb_msg_convention, nb_msg_refactor, nb_msg_warning,
nb_msg_error, nb_msg_fatal)
return (nb_missing_docstrings, float(pylint_score),
nb_msg_convention, nb_msg_refactor, nb_msg_warning,
nb_msg_error, nb_msg_fatal)
def get_nb_pychecker_warnings(filename):
"""Run pychecker for FILENAME and return the number of warnings.
Do not return warnings from imported files, only warnings found
inside FILENAME.
"""
nb_warnings_found = 0
filename_to_watch_for = os.path.basename(filename) # pychecker strips leading path
(dummy, pipe, dummy) = os.popen3("pychecker --limit=10000 %s" % filename)
pychecker_output_lines = pipe.readlines()
for line in pychecker_output_lines:
if line.find(filename_to_watch_for + ":") > -1:
nb_warnings_found += 1
if verbose >= 9:
print "get_nb_pychecker_warnings(%s) = %s" % (filename,
nb_warnings_found)
return nb_warnings_found
def calculate_module_kwalitee(modulesdir, modulename):
"""Run kwalitee tests for MODULENAME in MODULESDIR
and return kwalitee dict with keys modulename, nb_loc,
nb_unit_tests, nb_regression_tests, nb_pychecker_warnings,
nb_missing_docstrings, avg_pylint_score.
"""
files_code = get_list_of_python_code_files(modulesdir, modulename)
files_unit = get_list_of_python_unit_test_files(modulesdir, modulename)
files_regression = get_list_of_python_regression_test_files(modulesdir,
modulename)
# 1 - calculate LOC:
nb_loc = 0
for filename in files_code:
nb_loc += get_nb_lines_in_file(filename)
# 2 - calculate # unit tests:
nb_unit_tests = 0
for filename in files_unit:
nb_unit_tests += get_nb_test_cases_in_file(filename)
# 3 - calculate # regression tests:
nb_regression_tests = 0
for filename in files_regression:
nb_regression_tests += get_nb_test_cases_in_file(filename)
# 4 - calculate pylint results and score:
total_nb_missing_docstrings = 0
total_pylint_score = 0.0
total_nb_msg_convention = 0
total_nb_msg_refactor = 0
total_nb_msg_warning = 0
total_nb_msg_error = 0
total_nb_msg_fatal = 0
files_for_pylinting = files_code + files_unit + files_regression
files_for_pylinting = wash_list_of_python_files_for_pylinting(files_for_pylinting)
for filename in files_for_pylinting:
(filename_nb_missing_docstrings, filename_pylint_score,
filename_nb_msg_convention, filename_nb_msg_refactor,
filename_nb_msg_warning, filename_nb_msg_error,
filename_nb_msg_fatal) = get_pylint_results(filename)
total_nb_missing_docstrings += filename_nb_missing_docstrings
total_pylint_score += filename_pylint_score
total_nb_msg_convention += filename_nb_msg_convention
total_nb_msg_refactor += filename_nb_msg_refactor
total_nb_msg_warning += filename_nb_msg_warning
total_nb_msg_error += filename_nb_msg_error
total_nb_msg_fatal += filename_nb_msg_fatal
# pylint: disable-msg=W0704
try:
avg_pylint_score = total_pylint_score / len(files_for_pylinting)
except ZeroDivisionError:
avg_pylint_score = 0.0
# 5 - calculate number of pychecker warnings:
nb_pychecker_warnings = 0
for filename in files_for_pylinting:
nb_pychecker_warnings += get_nb_pychecker_warnings(filename)
# 6 - return kwalitee dict:
return {'modulename': modulename,
'nb_loc': nb_loc,
'nb_unit_tests': nb_unit_tests,
'nb_regression_tests': nb_regression_tests,
'nb_missing_docstrings': total_nb_missing_docstrings,
'nb_pychecker_warnings': nb_pychecker_warnings,
'avg_pylint_score': avg_pylint_score,
'nb_msg_convention': total_nb_msg_convention,
'nb_msg_refactor': total_nb_msg_refactor,
'nb_msg_warning': total_nb_msg_warning,
'nb_msg_error': total_nb_msg_error,
'nb_msg_fatal': total_nb_msg_fatal,
}
def get_invenio_modulenames(dirname="."):
"""Return the list of all CDS Invenio source modules
(directories).
"""
modulenames = os.listdir(dirname)
# remove CVS:
# pylint: disable-msg=W0141
modulenames = filter(lambda x: not x=="CVS", modulenames)
# remove non-directories:
modulenames = filter(lambda x: os.path.isdir(dirname + "/" + x),
modulenames)
# remove webhelp, not in Python:
modulenames = filter(lambda x: not x=="webhelp", modulenames)
# remove webstat, not in Python:
modulenames = filter(lambda x: not x=="webstat", modulenames)
# sort alphabetically:
modulenames.sort()
return modulenames
def generate_kwalitee_stats_for_all_modules(modulesdir):
"""Run kwalitee estimation for each CDS Invenio module and print
the results on stdout.
"""
# init kwalitee measurement structure:
kwalitee = {}
kwalitee['TOTAL'] = {'modulename': 'TOTAL',
'nb_loc': 0,
'nb_unit_tests': 0,
'nb_regression_tests': 0,
'nb_missing_docstrings': 0,
'nb_pychecker_warnings': 0,
'avg_pylint_score': 0,
'nb_msg_convention': 0,
'nb_msg_refactor': 0,
'nb_msg_warning': 0,
'nb_msg_error': 0,
'nb_msg_fatal': 0,
}
# detect CDS Invenio modules:
modulenames = get_invenio_modulenames(modulesdir)
if "websearch" not in modulenames:
print "Cannot find CDS Invenio modules in %s." % modulesdir
usage()
sys.exit(1)
# print header
print "="*80
print "CDS Invenio Python Code Kwalitee Check %41s" % time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print "="*80
print ""
print "%(modulename)13s %(nb_loc)8s %(nb_unit)6s %(nb_regression)6s %(nb_tests_per_1k_loc)8s %(nb_missing_docstrings)8s %(nb_pychecker_warnings)12s %(avg_pylint_score)11s %(pylint_details)s" % \
{ 'modulename': 'Module',
'nb_loc': '#LOC',
'nb_unit': '#UnitT',
'nb_regression': '#RegrT',
'nb_tests_per_1k_loc': '#T/1kLOC',
'nb_missing_docstrings': '#MissDoc',
'nb_pychecker_warnings': '#PyChk/1kSRC',
'avg_pylint_score': 'PyLintScore',
'pylint_details': 'PyLintDetails'}
print " ", "-"*11, "-"*8, "-"*6, "-"*6, "-"*8, "-"*8, "-"*12, "-"*11, "-"*25
for modulename in modulenames:
# calculate kwalitee for this modulename:
kwalitee[modulename] = calculate_module_kwalitee(modulesdir, modulename)
# add it to global results:
kwalitee['TOTAL']['nb_loc'] += kwalitee[modulename]['nb_loc']
kwalitee['TOTAL']['nb_unit_tests'] += kwalitee[modulename]['nb_unit_tests']
kwalitee['TOTAL']['nb_regression_tests'] += kwalitee[modulename]['nb_regression_tests']
kwalitee['TOTAL']['nb_pychecker_warnings'] += kwalitee[modulename]['nb_pychecker_warnings']
kwalitee['TOTAL']['nb_missing_docstrings'] += kwalitee[modulename]['nb_missing_docstrings']
kwalitee['TOTAL']['avg_pylint_score'] += kwalitee[modulename]['avg_pylint_score']
kwalitee['TOTAL']['nb_msg_convention'] += kwalitee[modulename]['nb_msg_convention']
kwalitee['TOTAL']['nb_msg_refactor'] += kwalitee[modulename]['nb_msg_refactor']
kwalitee['TOTAL']['nb_msg_warning'] += kwalitee[modulename]['nb_msg_warning']
kwalitee['TOTAL']['nb_msg_error'] += kwalitee[modulename]['nb_msg_error']
kwalitee['TOTAL']['nb_msg_fatal'] += kwalitee[modulename]['nb_msg_fatal']
# print results for this modulename:
print "%(modulename)13s %(nb_loc)8d %(nb_unit)6d %(nb_regression)6d %(nb_tests_per_1k_loc)8.2f %(nb_missing_docstrings)8d %(nb_pychecker_warnings)12.3f %(avg_pylint_score)8.2f/10 %(pylint_details)s" % \
{ 'modulename': kwalitee[modulename]['modulename'],
'nb_loc': kwalitee[modulename]['nb_loc'],
'nb_unit': kwalitee[modulename]['nb_unit_tests'],
'nb_regression': kwalitee[modulename]['nb_regression_tests'],
'nb_tests_per_1k_loc': kwalitee[modulename]['nb_loc'] != 0 and \
(kwalitee[modulename]['nb_unit_tests'] + kwalitee[modulename]['nb_regression_tests'] + 0.0) / kwalitee[modulename]['nb_loc'] * 1000.0 or \
0,
'nb_missing_docstrings': kwalitee[modulename]['nb_missing_docstrings'],
'nb_pychecker_warnings': kwalitee[modulename]['nb_loc'] != 0 and \
(kwalitee[modulename]['nb_pychecker_warnings'] + 0.0 ) / kwalitee[modulename]['nb_loc'] * 1000.0 or \
0,
'avg_pylint_score': kwalitee[modulename]['avg_pylint_score'],
'pylint_details': "%3dF %3dE %3dW %3dR %3dC" % (kwalitee[modulename]['nb_msg_fatal'],
kwalitee[modulename]['nb_msg_error'],
kwalitee[modulename]['nb_msg_warning'],
kwalitee[modulename]['nb_msg_refactor'],
kwalitee[modulename]['nb_msg_convention'],)
}
# print totals:
print " ", "-"*11, "-"*8, "-"*6, "-"*6, "-"*8, "-"*8, "-"*12, "-"*11, "-"*25
print "%(modulename)13s %(nb_loc)8d %(nb_unit)6d %(nb_regression)6d %(nb_tests_per_1k_loc)8.2f %(nb_missing_docstrings)8d %(nb_pychecker_warnings)12.3f %(avg_pylint_score)8.2f/10 %(pylint_details)s" % \
{ 'modulename': kwalitee['TOTAL']['modulename'],
'nb_loc': kwalitee['TOTAL']['nb_loc'],
'nb_unit': kwalitee['TOTAL']['nb_unit_tests'],
'nb_regression': kwalitee['TOTAL']['nb_regression_tests'],
'nb_tests_per_1k_loc': kwalitee['TOTAL']['nb_loc'] != 0 and \
(kwalitee['TOTAL']['nb_unit_tests'] + kwalitee['TOTAL']['nb_regression_tests'] + 0.0) / kwalitee['TOTAL']['nb_loc']*1000.0 or \
0,
'nb_missing_docstrings': kwalitee['TOTAL']['nb_missing_docstrings'],
'nb_pychecker_warnings': kwalitee['TOTAL']['nb_loc'] != 0 and \
(kwalitee['TOTAL']['nb_pychecker_warnings'] + 0.0 ) / kwalitee['TOTAL']['nb_loc'] * 1000.0 or \
0,
'avg_pylint_score': kwalitee['TOTAL']['avg_pylint_score'] / (len(kwalitee.keys()) - 1),
'pylint_details': "%3dF %3dE %3dW %3dR %3dC" % (kwalitee['TOTAL']['nb_msg_fatal'],
kwalitee['TOTAL']['nb_msg_error'],
kwalitee['TOTAL']['nb_msg_warning'],
kwalitee['TOTAL']['nb_msg_refactor'],
kwalitee['TOTAL']['nb_msg_convention'],)
}
# print legend:
print """
Legend:
#LOC = number of lines of code (excl. test files, incl. comments/blanks)
#UnitT = number of unit test cases
#RegrT = number of regression test cases
#T/1kLOC = number of tests per 1k lines of code [desirable: > 10]
#MissDoc = number of missing docstrings [desirable: 0]
#PyChk/1kSRC = number of PyChecker warnings per 1k sources [desirable: 0]
PyLintScore = average PyLint score [desirable: > 9.00]
PyLintDetails = number of PyLint messages (Fatal, Error, Warning, Refactor, Convention)
"""
return
def generate_kwalitee_stats_for_some_files(filenames):
"""Run kwalitee checks on FILENAMES and print results."""
# init kwalitee measurement structure:
kwalitee = {}
kwalitee['TOTAL'] = {'nb_loc': 0,
'nb_missing_docstrings': 0,
'nb_pychecker_warnings': 0,
'avg_pylint_score': 0,
'nb_msg_convention': 0,
'nb_msg_refactor': 0,
'nb_msg_warning': 0,
'nb_msg_error': 0,
'nb_msg_fatal': 0,
}
# print header:
print "%(filename)50s %(nb_loc)8s %(nb_missing_docstrings)8s %(nb_pychecker_warnings)6s %(avg_pylint_score)11s %(pylint_details)s" % {
'filename': 'File',
'nb_loc': '#LOC',
'nb_missing_docstrings': '#MissDoc',
'nb_pychecker_warnings': '#PyChk',
'avg_pylint_score': 'PyLintScore',
'pylint_details': 'PyLintDetails',
}
print " ", "-"*48, "-"*8, "-"*8, "-"*6, "-"*11, "-"*25
files_for_pylinting = wash_list_of_python_files_for_pylinting(filenames)
for filename in files_for_pylinting:
# calculate the kwalitee of the files:
kwalitee[filename] = {'nb_loc': 0,
'nb_missing_docstrings': 0,
'nb_pychecker_warnings': 0,
'avg_pylint_score': 0,
'nb_msg_convention': 0,
'nb_msg_refactor': 0,
'nb_msg_warning': 0,
'nb_msg_error': 0,
'nb_msg_fatal': 0,
}
kwalitee[filename]['nb_loc'] = get_nb_lines_in_file(filename)
kwalitee[filename]['nb_pychecker_warnings'] = get_nb_pychecker_warnings(filename)
(kwalitee[filename]['nb_missing_docstrings'], kwalitee[filename]['avg_pylint_score'],
kwalitee[filename]['nb_msg_convention'], kwalitee[filename]['nb_msg_refactor'],
kwalitee[filename]['nb_msg_warning'], kwalitee[filename]['nb_msg_error'],
kwalitee[filename]['nb_msg_fatal']) = get_pylint_results(filename)
# add it to the total results:
kwalitee['TOTAL']['nb_loc'] += kwalitee[filename]['nb_loc']
kwalitee['TOTAL']['nb_pychecker_warnings'] += kwalitee[filename]['nb_pychecker_warnings']
kwalitee['TOTAL']['nb_missing_docstrings'] += kwalitee[filename]['nb_missing_docstrings']
kwalitee['TOTAL']['avg_pylint_score'] += kwalitee[filename]['avg_pylint_score']
kwalitee['TOTAL']['nb_msg_convention'] += kwalitee[filename]['nb_msg_convention']
kwalitee['TOTAL']['nb_msg_refactor'] += kwalitee[filename]['nb_msg_refactor']
kwalitee['TOTAL']['nb_msg_warning'] += kwalitee[filename]['nb_msg_warning']
kwalitee['TOTAL']['nb_msg_error'] += kwalitee[filename]['nb_msg_error']
kwalitee['TOTAL']['nb_msg_fatal'] += kwalitee[filename]['nb_msg_fatal']
# print results for this filename:
print "%(filename)50s %(nb_loc)8d %(nb_missing_docstrings)8d %(nb_pychecker_warnings)6d %(avg_pylint_score)8.2f/10 %(pylint_details)s" % {
'filename': filename,
'nb_loc': kwalitee[filename]['nb_loc'],
'nb_missing_docstrings': kwalitee[filename]['nb_missing_docstrings'],
'nb_pychecker_warnings': kwalitee[filename]['nb_pychecker_warnings'],
'avg_pylint_score': kwalitee[filename]['avg_pylint_score'],
'pylint_details': "%3dF %3dE %3dW %3dR %3dC" % (kwalitee[filename]['nb_msg_fatal'],
kwalitee[filename]['nb_msg_error'],
kwalitee[filename]['nb_msg_warning'],
kwalitee[filename]['nb_msg_refactor'],
kwalitee[filename]['nb_msg_convention'],)
}
# print totals:
print " ", "-"*48, "-"*8, "-"*8, "-"*6, "-"*11, "-"*25
print "%(filename)50s %(nb_loc)8d %(nb_missing_docstrings)8d %(nb_pychecker_warnings)6d %(avg_pylint_score)8.2f/10 %(pylint_details)s" % {
'filename': 'TOTAL',
'nb_loc': kwalitee['TOTAL']['nb_loc'],
'nb_missing_docstrings': kwalitee['TOTAL']['nb_missing_docstrings'],
'nb_pychecker_warnings': kwalitee['TOTAL']['nb_pychecker_warnings'],
'avg_pylint_score': kwalitee['TOTAL']['avg_pylint_score'] / (len(kwalitee.keys()) - 1),
'pylint_details': "%3dF %3dE %3dW %3dR %3dC" % (kwalitee['TOTAL']['nb_msg_fatal'],
kwalitee['TOTAL']['nb_msg_error'],
kwalitee['TOTAL']['nb_msg_warning'],
kwalitee['TOTAL']['nb_msg_refactor'],
kwalitee['TOTAL']['nb_msg_convention'],)
}
# print legend:
print """
Legend:
#LOC = number of lines of code (incl. comments/blanks)
#MissDoc = number of missing docstrings [desirable: 0]
#PyChk = number of PyChecker warnings [desirable: 0]
PyLintScore = PyLint score [desirable: > 9.00]
PyLintDetails = number of PyLint messages (Fatal, Error, Warning, Refactor, Convention)
"""
return
def usage():
"""Print usage info."""
print """\
Usage: python kwalitee.py <topsrcdir | file1.py file2.py ...>
Description: check the kwalitee of the CDS Invenio Python code.
Examples:
$ python kwalitee.py ~/src/cds-invenio/
$ python kwalitee.py ../../websearch/lib/*.py"""
return
def main():
"""Analyze CLI options and invoke appropriate actions."""
if len(sys.argv) < 2:
usage()
sys.exit(1)
first_argument = sys.argv[1]
if first_argument.startswith("-h") or first_argument.startswith("--help"):
usage()
sys.exit(0)
elif os.path.isdir(first_argument):
modulesdir = first_argument + "/modules"
if os.path.isdir(modulesdir):
generate_kwalitee_stats_for_all_modules(modulesdir)
else:
print "ERROR: %s does not seem to be CDS Invenio top source directory." % first_argument
usage()
sys.exit(0)
elif os.path.isfile(first_argument) and first_argument.endswith(".py"):
generate_kwalitee_stats_for_some_files(sys.argv[1:])
else:
print "ERROR: don't know what to do with %s." % first_argument
usage()
sys.exit(1)
return
def test():
"""Test stuff."""
print get_pylint_results("/opt/cds-invenio/lib/python/invenio/bibrecord.py")
if __name__ == "__main__":
#test()
main()

Event Timeline