diff --git a/bin/licenser.py b/bin/licenser.py index 563c504..bb2126f 100755 --- a/bin/licenser.py +++ b/bin/licenser.py @@ -1,140 +1,145 @@ #! /usr/bin/env python3 # -*- coding: utf-8 -*- __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" import argparse import datetime as dt import sys import pylicenser as pylic def mkdate(datestring): return dt.datetime.strptime(datestring, '%Y-%m-%d').date() if __name__ == "__main__": parser = argparse.ArgumentParser(prog='licenser', add_help=True) parser.add_argument("-i,--input", help="Filename to check", dest="filename", default=None) parser.add_argument("-f,--file_list", help="File containing a list of files", dest="file_list", default=None) parser.add_argument("--repo", help="Repository to consider", dest="repo", default=None) parser.add_argument("-p,--path", help="Folder where to find the files", dest="path", default="") parser.add_argument("-s,--skip-first", help="Skip the first files when using the -f option", dest="skip_first", type=int, default=0) parser.add_argument("-v,--versioning-backend", dest="vc_backend", help="Backend used as versioning system (svn, git, none)") parser.add_argument("configuration_file", help="File containing the configuration, .csv or .db (sqlite)") parser.add_argument("-r,--release-date", help="Date at which the release is prepared", dest='release_date', type=mkdate, default=dt.datetime.now().date()) parser.add_argument("-a,--no-author-check", help="Do not check the author list", dest="no_author_check", action='store_true', default=False) parser.add_argument("-b,--no-brief-check", help="Do not check the brief", dest="no_brief_check", action='store_true', default=False) parser.add_argument("--ignore-threshold", help="Limit of number of line to consider an author from the VC system", dest="ignore_threshold", type=int, default=0) parser.add_argument("--ignore-filled-briefs", help="Do not check the brief if they are not empty", dest="ignore_filled_briefs", action='store_true', default=False) parser.add_argument("--dry-run", help="Do nothing for real", dest='dry_run', action='store_true', default=False) parser.add_argument("-l,--force-license", help="Force a give license", dest="force_license", default=None) parser.add_argument("--force", help="Force to update the header even it is considered up-to-date", dest="force", action='store_true', default=False) + parser.add_argument("--yes", + help="Answers yes to keep author and brief questions", + dest="yes", + action='store_true', + default=False) args = parser.parse_args() if (args.filename is None) and (args.file_list is None): print("You should at least give a filename or a file_list") parser.print_help() sys.exit(-1) if (args.filename is not None) and (args.file_list is not None): print("You should give only on of the option filename or file_list") parser.print_help() sys.exit(-1) file_list = [] if args.filename is not None: file_list.append(args.filename) if args.file_list is not None: with open(args.file_list, "r") as fh: file_list = [l.strip() for l in fh] db = pylic.LicenserDB(args.configuration_file) c = 0 t = len(file_list) _kwargs = vars(args) _kwargs.pop("filename", None) for f in file_list: c += 1 print("[{0:>3}%]({2:>3}/{3}) {1}".format(int(float(c) / t * 100), pylic.print_colored(f, attrs=['bold']), c, t), end="") if c <= args.skip_first: print(" ({0})".format(pylic.print_colored("skipped", "red", attrs=['bold']))) continue elif args.force: print(" ({0})".format(pylic.print_colored("forced", "red", attrs=['bold']))) else: print("") if not args.path == "": path = args.path.rstrip("/") + "/" else: path = "" ft = pylic.FileTransformer(path + f, db, **_kwargs) ft.replace_file(args.dry_run) diff --git a/pylicenser/file_info.py b/pylicenser/file_info.py index cbcbcb2..1535331 100755 --- a/pylicenser/file_info.py +++ b/pylicenser/file_info.py @@ -1,272 +1,272 @@ # -*- coding: utf-8 -*- __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" import os import re from datetime import datetime as dt from . import export UNKNOWN_TYPE = 0 CPP_SOURCES = 1 CMAKE_FILES = 2 SWIG_FILES = 3 PYTHON_SCRIPT = 4 CREATION_MODIFICATION = 0 LAST_MODIFICATION = 1 CREATION = 2 @export class FileInfo: __date_style_conv = {'creation_modification': CREATION_MODIFICATION, 'last_modification': LAST_MODIFICATION, 'creation': CREATION} __supported_ext = [] __supported_types = {CPP_SOURCES: [".cc", ".c", ".hh", ".h"], CMAKE_FILES: [".txt", ".cmake"], SWIG_FILES: [".i"], PYTHON_SCRIPT: [".py"]} _file_type = UNKNOWN_TYPE _file_content = "" _warn_list = list() _brief = "" _sections = list() _authors = set() _header = "" def __init__(self, filename, author_db): self._warn_list = list() self._sections = list() self._authors = set() self._author_db = author_db self._filename = os.path.expanduser(filename) garbage, self._ext = os.path.splitext(os.path.basename(filename)) if self._ext == '.in': self._ext = os.path.splitext(garbage)[1] for key, value in self.__supported_types.items(): self.__supported_ext.extend(value) if self._ext not in self.__supported_ext: raise (NotImplementedError("File {0} has been skipped based on its extension".format(self._filename))) for key, value in self.__supported_types.items(): if self._ext in value: self._file_type = key self.__analyse_content() def __analyse_content(self): with open(self._filename) as f: self._file_content = f.read() try: self.__split_header() except Exception as error: raise Exception("While dealing with file {0}:\n{1}".format(self._filename, error)) self.__find_authors() self.__find_brief() self.__save_sections() if self._warn_list: print("\nWARNING!:") print(" While dealing with file {0}".format(self._filename)) print(" the following shit happened:") [print(" {0}".format(warning)) for warning in self._warn_list] print("{0}:1:".format(self._filename)) def __save_sections(self): reg = re.compile('^(.*@section)', re.MULTILINE) pref = re.search(reg, self._header) pref = pref.group(1) if pref is not None else "" reg = re.compile('^.*@section', re.MULTILINE) licreg = re.compile('LICEN.E') for section in re.split(reg, self._header)[1:]: if not re.search(licreg, section.split('\n')[0]): self._sections.append(pref + section) def __split_header(self): if not re.search("Copyright", self._file_content): self._header = "" self._body = self._file_content print("This file as no header or a really badly formatted one!!") else: if self._file_type in [CPP_SOURCES, SWIG_FILES]: self._header, self._body = re.split("\*/", self._file_content, maxsplit=1) self._header = '\n'.join((line for line in self._header.split('\n') if line.strip())) elif self._file_type == CMAKE_FILES: lines = self._file_content.split('\n') nb_sep = 0 nb_whitelines = 0 header_lines = list() while nb_sep < 2: if lines[0].strip().startswith('#===='): nb_sep += 1 elif not lines[0].strip(): nb_whitelines += 1 header_lines.append(lines.pop(0)) if nb_whitelines > 0: self._warn_list.append("The header has a blank line or is ill-formed") self._header = "\n".join(header_lines) self._body = "\n".join(lines) def __find_authors(self): reg = re.compile( '@author\s+(.*)\s 1: self._warn_list.append("There's multiple @brief") self._brief = "\n".join((line.replace(prefix, "").strip() for line in matches[0].group(1).split('\n') if len(line.strip()) > 1)) def generate_header(self, real_authors=None, copyright_txt="", creation_date=None, last_modification_date=None, date_style=CREATION_MODIFICATION, date_format="%a %b %d %H:%M:%S %Y", brief=None): if type(date_style) == str: # noinspection PyTypeChecker date_style = self.__date_style_conv[date_style] if self._file_type in [CPP_SOURCES, SWIG_FILES]: starter = "/**" prefix = " *" closer = " */" force_closer = True elif self._file_type == CMAKE_FILES: starter = "#===============================================================================" prefix = "#" closer = "#===============================================================================" force_closer = False else: raise IOError("file type not recognized") if real_authors is None: real_authors = self._authors new_lines = list() new_lines.append(starter) # file name new_lines.append(prefix + " @file " + os.path.basename(self._filename)) new_lines.append(prefix) # authors authors = list(real_authors) authors.sort(key=lambda x: x.last_name + ' ' + x.first_name) for author in authors: - new_lines.append('{0} @author {1.first_name} {1.last_name} <{1.e_mail}>'.format(prefix, author)) + new_lines.append('{0} @author {1}'.format(prefix, author)) new_lines.append(prefix) # date if date_style == CREATION_MODIFICATION and creation_date is not None and last_modification_date is not None: new_lines.append( "{0} @date creation: {1}".format(prefix, creation_date.strftime(date_format))) if last_modification_date != creation_date: new_lines.append("{0} @date last modification: {1}".format(prefix, last_modification_date.strftime(date_format))) elif date_style == LAST_MODIFICATION and last_modification_date is not None: new_lines.append("{0} @date {1}".format( prefix, last_modification_date.strftime(date_format))) elif date_style == CREATION and creation_date is not None: new_lines.append("{0} @date {1}".format( prefix, creation_date.strftime(date_format))) elif date_style == LAST_MODIFICATION and last_modification_date is not None: new_lines.append("{0} @date {1}".format( prefix, last_modification_date.strftime(date_format))) elif date_style == CREATION and creation_date is not None: new_lines.append("{0} @date {1}".format( prefix, creation_date.strftime(date_format))) new_lines.append(prefix) # brief brief_list = (self._brief if brief is None else brief).split("\n") pref_join = ("\n{0} ".format(prefix)) new_lines.append('{0} @brief {1}'.format(prefix, pref_join.join([b for b in brief_list]))) new_lines.append(prefix) # license new_lines.append(prefix + " @section LICENSE") new_lines.append(prefix) for line in copyright_txt.split("\n"): new_lines.append('{0}{1}{2}'.format(prefix, " " if line else "", line)) new_lines.append(prefix) # sections for section in self._sections: new_lines += section.split('\n') if force_closer or not self._sections: new_lines.append(closer) return '\n'.join(new_lines) def get_brief(self): return self._brief def replace_file(self, new_header=None): if new_header is None: new_header = self._header body = self._body.split("\n") while body and not body[0].strip(): body.pop(0) new_file = (new_header + '\n\n' + '\n'.join(body)) with open(self._filename, "w") as fh: print(new_file, file=fh, end='') @property def authors(self): return self._authors diff --git a/pylicenser/file_transformer.py b/pylicenser/file_transformer.py index 9b7c577..50e583d 100755 --- a/pylicenser/file_transformer.py +++ b/pylicenser/file_transformer.py @@ -1,295 +1,306 @@ # -*- coding: utf-8 -*- __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" import datetime as dt from . import licenser_ask_question from . import author_db as adb from . import copyright_db as cdb from . import export from . import file_info as fi from . import print_colored from . import version_info as vc import os from pygments import highlight from pygments.lexers.diff import DiffLexer from pygments.formatters.terminal256 import Terminal256Formatter from pygments.formatters.terminal import TerminalFormatter @export class FileTransformer(object): """ Class that reformat the headers """ __keep_authors = None _brief = None __ignore = False _new_header = None def __init__(self, filename, db, release_date=dt.datetime.now().date(), no_author_check=False, no_brief_check=False, force=False, **kwargs): self.__filename = filename self.__release_date = release_date self.__db = db self.__no_brief_check = no_brief_check self.__no_author_check = no_author_check + # Check the authors + self.__author_db = adb.AuthorDB(db) + if "vc_backend" in kwargs and kwargs["vc_backend"] is not None: vc_back = kwargs["vc_backend"] else: vc_back = db.versioning_backend if vc_back != "none": self._date_style = self.__db.get_config('date_style') self.__repo = self.__db.get_config('repo') self._vc_info = vc.VersionInfo(self.__repo, self.__filename, self.__db.get_list_of_ignore_emails(), backend=vc_back, rev_to=release_date) self.__name = self._vc_info.name self.__filename = self.__repo + '/' + self.__name self._creation_date = self._vc_info.creation_date self._last_modif = self._vc_info.last_modification_date else: self._creation_date = None self._last_modif = None self._date_style = None self._vc_info = None - # Check the authors - self.__author_db = adb.AuthorDB(db) - try: self._file = fi.FileInfo(self.__filename, self.__author_db) except NotImplementedError: print("File {0} ignored due to {1}".format(self._vc_info.name, print_colored('unknown type', 'red', attrs=['bold']))) self.__ignore = True return rev_from = None license_id = None - file_maj = None self.__oldest_name = "" if vc_back != "none": self.__oldest_name = self._vc_info.oldest_name file_maj = self.__db.find_file(self.__oldest_name) if file_maj is not None: rev_from, license_id = file_maj if not force and rev_from.date() >= release_date: print("File {0} ignored due to recent modifications ({1})".format(self._vc_info.name, print_colored( rev_from.strftime( "%Y-%m-%d"), 'red', attrs=['bold']))) self.__ignore = True return if not self.__no_author_check: self._vc_authors = self._vc_info.authors self._vc_info.populate(self.__author_db, rev_to=self.__release_date, rev_from=rev_from) self._vc_authors = self._vc_info.authors self._f_authors = self._file.authors self._real_authors = self.__compare_authors(self._f_authors, self._vc_authors, file_is_good=False, **kwargs) if self._real_authors - self._f_authors: print("Added authors:\n{0}".format( "\n".join([" @author {0}".format(author) for author in self._real_authors - self._f_authors]))) if self._f_authors - self._real_authors: print("Removed authors:\n{0}".format( "\n".join([" @author {0}".format(author) for author in self._f_authors - self._real_authors]))) else: self._real_authors = self._file.authors # Check the brief if not self.__no_brief_check: self.__check_brief(**kwargs) else: self._brief = self._file.get_brief() if "force_license" in kwargs and kwargs['force_license'] is not None: license_id = kwargs["force_license"] # Getting the license content self.__copyright_base = cdb.CopyrightDB(db) if license_id is None: copyright_policy = self.__db.get_config('copyright_policy') if copyright_policy == 'creation_date': cdate = self._creation_date.date() elif copyright_policy == 'release_date': cdate = self.__release_date else: cdate = None self._lic = self.__copyright_base.find_by_date(cdate) else: self._lic = self.__copyright_base.find_by_id(license_id) if self._lic is None: raise ("The license with the id {0} is not defined".format(license_id)) date_format = self.__db.get_config('date_format') # Generates the new header file self._new_header = self._file.generate_header( real_authors=self._real_authors, copyright_txt=self._lic.text, date_format=date_format, date_style=self._date_style, last_modification_date=self._last_modif, creation_date=self._creation_date, brief=self._brief) # noinspection PyPep8Naming,PyUnusedLocal def __compare_authors(self, file_known_tmp, vc_known, file_is_good=False, - ignore_threshold=0, **kwargs): + ignore_threshold=0, yes=False, **kwargs): file_known = file_known_tmp if len(file_known) == 0 and len(vc_known) == 1: file_known = vc_known if not file_is_good: last_keep = None if len(self.__oldest_name) != 0: last_keep = self.__db.get_last_keep_authors(self.__oldest_name, self.__author_db) if last_keep is None: last_keep = [] for author in last_keep: print("Keeping @author {0} ({1})".format(author, print_colored("previously validated", 'red', attrs=['bold']))) file_only = file_known - vc_known - set(last_keep) to_remove = set() if file_only: KEEP = 0 REMOVE = 1 for author in file_only: - answer = licenser_ask_question( - "Do you want to remove" + - " @author {0}".format(author), - {'k': KEEP, 'r': REMOVE}, 'k') - if answer == REMOVE: - to_remove.add(author) + if not yes: + answer = licenser_ask_question("Do you want to remove" + + " @author {0}".format(author), + {'k': KEEP, 'r': REMOVE}, 'k') + if answer == REMOVE: + to_remove.add(author) + else: + print("Do you want to remove @author {0}? ({1}/r)".format(author, + print_colored('K', + 'red', + attrs=['bold']))) + file_known = file_known - to_remove self.__keep_authors = file_known - to_remove - vc_known vc_only = vc_known - file_known if vc_only: to_add = set() YES = 0 NO = 1 DIFF = 2 for author in vc_only: modifications, stats = self._vc_info.modifications_by_author(author.user_name) number_of_modified_lines = sum([len(e[2]) for e in modifications]) number_of_modifications = len(modifications) if number_of_modified_lines <= ignore_threshold or number_of_modified_lines == number_of_modifications: print('Potential new author' + ' @author {0} '.format(author) + '({0} modifications, lines count {1}) {2}'.format(number_of_modifications, number_of_modified_lines, print_colored("[ignored do to threshold]", 'red', attrs=['bold']))) continue answer = DIFF while answer == DIFF: answer = licenser_ask_question( 'Do you want to add' + ' @author {0}'.format(print_colored(author, "blue")) + ' ({0} modifications, lines count {1}) '.format(number_of_modifications, number_of_modified_lines), {'d': DIFF, 'n': NO, 'y': YES}, 'n') if answer == DIFF: for info, modif, nb_lines, diff_stats in modifications: if len(modif) > 2: print(info) formatter = (Terminal256Formatter if '256color' in os.environ.get('TERM', '') else TerminalFormatter) print(highlight(modif, DiffLexer(), formatter())) elif answer == YES: to_add.add(author) file_known = file_known.union(to_add) return file_known # noinspection PyUnusedLocal - def __check_brief(self, ignore_filled_briefs=False, **kwargs): + def __check_brief(self, ignore_filled_briefs=False, yes=False, **kwargs): self._brief = self._file.get_brief() if self._brief and not ignore_filled_briefs: if len(self.__oldest_name) != 0: old_brief = self.__db.get_last_brief(self.__oldest_name) else: old_brief = None if old_brief is not None and old_brief == self._brief: print('Brief: {0} ({1})'.format(self._brief, print_colored("previously validated", 'red', attrs=['bold']))) else: - keep_brief = licenser_ask_question("\"{0}\"\nDo you want to keep this brief".format(self._brief)) - if not keep_brief: - self._brief = False + if not yes: + keep_brief = licenser_ask_question("\"{0}\"\nDo you want to keep this brief".format(self._brief)) + if not keep_brief: + self._brief = False + else: + print("\"{0}\"\nDo you want to keep this brief ? ({1}/n)".format(self._brief, + print_colored('Y', + 'red', + attrs=['bold']))) if not self._brief: res = input("Please type the brief for file: ") brief = [] while res is not "": brief.append(res) res = input("> ") self._brief = "\n".join(brief) def replace_file(self, dry_run=True): if self.__ignore: return if dry_run: print(self._new_header) else: if self._vc_info is not None: self.__db.update_license_file(self.__oldest_name, self._lic.lid, self.__release_date) if self._brief is not None and not self.__no_brief_check: self.__db.update_brief_file(self.__oldest_name, self._brief, self.__release_date) if self.__keep_authors is not None and not self.__no_author_check: self.__db.update_keep_authors_file(self.__oldest_name, self.__keep_authors, self.__release_date) self._file.replace_file(self._new_header) diff --git a/pylicenser/version_info.py b/pylicenser/version_info.py index f315e39..7ad3665 100755 --- a/pylicenser/version_info.py +++ b/pylicenser/version_info.py @@ -1,89 +1,91 @@ # -*- coding: utf-8 -*- __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" from . import print_colored from . import export import os @export class VersionInfo: """Generic class handling the communication with the versioning system""" # members _authors = set() _author_list = dict() _modification_dates = [] _name = "" _repo = "" _filename = "" _revisions = [] _names = dict() def __new__(cls, repo, filename, ignore_list, backend=None, **kwargs): """ Factory constructor depending on the chosen backend """ if backend == 'svn': from .vcs import svn return super().__new__(svn.SVNInfo) elif backend == 'git': from .vcs import git return super().__new__(git.GITInfo) else: raise Exception("{0} not a known backend".format(backend)) # noinspection PyUnusedLocal def __init__(self, repo, filename, ignore_list, **kwargs): self._repo = os.path.expanduser(repo) self._filename = os.path.expanduser(filename) @property def creation_date(self): - return min(self._modification_dates) + if len(self._modification_dates) > 0: + return min(self._modification_dates) @property def last_modification_date(self): - return max(self._modification_dates) + if len(self._modification_dates) > 0: + return max(self._modification_dates) @property def authors(self): return self._authors def number_of_modifications(self, author): return len(self._author_list[author]) def modifications_by_author(self, author): auth = self._author_list[author] res = [] stats = {} for d, r, msg in auth: date = d.strftime("%d-%m-%Y") info = print_colored("@ {0} rev {1} msg {2}".format(date, r, msg), 'blue', attrs=['bold']) modif, _stats = self.get_modifications(r) for k, v in _stats.items(): if k in stats: stats[k] += v else: stats[k] = v res.append([info, modif, modif.split("\n"), _stats]) return res, stats @property def name(self): return self._name @property def oldest_name(self): return self._name def get_modifications(self, revision): pass