diff --git a/bin/licenser.py b/bin/licenser.py index 8758306..563c504 100755 --- a/bin/licenser.py +++ b/bin/licenser.py @@ -1,141 +1,140 @@ #! /usr/bin/env python3 # -*- coding: utf-8 -*- __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" import argparse import datetime as dt import sys import pylicenser as pylic def mkdate(datestring): return dt.datetime.strptime(datestring, '%Y-%m-%d').date() if __name__ == "__main__": parser = argparse.ArgumentParser(prog='licenser', add_help=True) parser.add_argument("-i,--input", help="Filename to check", dest="filename", default=None) parser.add_argument("-f,--file_list", help="File containing a list of files", dest="file_list", default=None) parser.add_argument("--repo", help="Repository to consider", dest="repo", default=None) parser.add_argument("-p,--path", help="Folder where to find the files", dest="path", default="") parser.add_argument("-s,--skip-first", help="Skip the first files when using the -f option", dest="skip_first", type=int, default=0) parser.add_argument("-v,--versioning-backend", dest="vc_backend", help="Backend used as versioning system (svn, git, none)") parser.add_argument("configuration_file", help="File containing the configuration, .csv or .db (sqlite)") parser.add_argument("-r,--release-date", help="Date at which the release is prepared", dest='release_date', type=mkdate, default=dt.datetime.now().date()) parser.add_argument("-a,--no-author-check", help="Do not check the author list", dest="no_author_check", action='store_true', default=False) parser.add_argument("-b,--no-brief-check", help="Do not check the brief", dest="no_brief_check", action='store_true', default=False) parser.add_argument("--ignore-threshold", help="Limit of number of line to consider an author from the VC system", dest="ignore_threshold", type=int, default=0) parser.add_argument("--ignore-filled-briefs", help="Do not check the brief if they are not empty", dest="ignore_filled_briefs", action='store_true', default=False) parser.add_argument("--dry-run", help="Do nothing for real", dest='dry_run', action='store_true', default=False) parser.add_argument("-l,--force-license", help="Force a give license", dest="force_license", default=None) parser.add_argument("--force", help="Force to update the header even it is considered up-to-date", dest="force", action='store_true', default=False) - args = parser.parse_args() if (args.filename is None) and (args.file_list is None): print("You should at least give a filename or a file_list") parser.print_help() sys.exit(-1) if (args.filename is not None) and (args.file_list is not None): print("You should give only on of the option filename or file_list") parser.print_help() sys.exit(-1) file_list = [] if args.filename is not None: file_list.append(args.filename) if args.file_list is not None: with open(args.file_list, "r") as fh: file_list = [l.strip() for l in fh] db = pylic.LicenserDB(args.configuration_file) c = 0 t = len(file_list) _kwargs = vars(args) _kwargs.pop("filename", None) for f in file_list: c += 1 print("[{0:>3}%]({2:>3}/{3}) {1}".format(int(float(c) / t * 100), pylic.print_colored(f, attrs=['bold']), c, t), end="") if c <= args.skip_first: print(" ({0})".format(pylic.print_colored("skipped", "red", attrs=['bold']))) continue elif args.force: print(" ({0})".format(pylic.print_colored("forced", "red", attrs=['bold']))) else: print("") if not args.path == "": path = args.path.rstrip("/") + "/" else: path = "" ft = pylic.FileTransformer(path + f, db, **_kwargs) ft.replace_file(args.dry_run) diff --git a/pylicenser/__init__.py b/pylicenser/__init__.py index a0a6665..5f09d8b 100644 --- a/pylicenser/__init__.py +++ b/pylicenser/__init__.py @@ -1,49 +1,50 @@ """ This modules is designed to rewrite header files. It was initially written for Akantu and LibMultiscale projects but could be reused for any other projects. """ def export(definition): """ Decorator to export definitions from sub-modules to the top-level package :param definition: definition to be exported :return: definition """ # Export the definition to the upper layer globals()[definition.__name__] = definition __all__.append(definition.__name__) return definition try: from termcolor import colored as print_colored except ImportError: # noinspection PyUnusedLocal def print_colored(string, *args, **kwargs): return string __all__ = ['export'] + @export def licenser_ask_question(question, possible_answer=None, default='y'): if possible_answer is None: possible_answer = {'y': True, 'n': False} answers = '/'.join([k if not k == default else k.upper() for k in possible_answer.keys()]) answer = None while answer not in possible_answer.keys(): answer = input('{0} ({1})? '.format(question, answers)) answer = answer.lower() if answer == '': return possible_answer[default] # if answer in possible_answer.keys(): return possible_answer[answer] # raise(Exception("\'{0}\' is not a valid answer to the question!".format(answer))) from . import licenser_db from . import version_info from . import file_info from . import author_db from . import copyright_db from . import file_transformer diff --git a/pylicenser/author_db.py b/pylicenser/author_db.py index f562601..9daa331 100755 --- a/pylicenser/author_db.py +++ b/pylicenser/author_db.py @@ -1,125 +1,135 @@ __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" from . import export, licenser_ask_question from builtins import property as _property, tuple as _tuple from operator import itemgetter as _itemgetter from collections import OrderedDict @export class Author(tuple): - 'Author(user_name, e_mail, last_name, first_name, real_email)' + """Author(user_name, e_mail, last_name, first_name, real_email)""" + __slots__ = () _fields = ('user_name', 'e_mail', 'last_name', 'first_name', 'real_email') + # noinspection PyInitNewSignature def __new__(cls, user_name, e_mail, last_name, first_name, real_email): - 'Create new instance of Author(user_name, e_mail, last_name, first_name, real_email)' + """Create new instance of Author(user_name, e_mail, last_name, first_name, real_email)""" return _tuple.__new__(cls, (user_name, e_mail, last_name, first_name, real_email)) + # noinspection PyIncorrectDocstring @classmethod def _make(cls, iterable, new=tuple.__new__, len=len): - 'Make a new Author object from a sequence or iterable' + """Make a new Author object from a sequence or iterable""" result = new(cls, iterable) if len(result) != 5: raise TypeError('Expected 5 arguments, got %d' % len(result)) return result + # noinspection PyArgumentList + @classmethod + def make(cls, iterable, new=tuple.__new__, len=len): + cls._make(cls, iterable, new=new, len=len) + + # noinspection PyMethodParameters def _replace(_self, **kwds): - 'Return a new Author object replacing specified fields with new values' + """Return a new Author object replacing specified fields with new values""" result = _self._make(map(kwds.pop, ('user_name', 'e_mail', 'last_name', 'first_name', 'real_email'), _self)) if kwds: raise ValueError('Got unexpected field names: %r' % list(kwds)) return result def __repr__(self): + # noinspection PyTypeChecker if self.real_email is not None and len(self.real_email) > 0: email = self.real_email else: email = self.e_mail return "{0} {1} <{2}>".format(self.first_name, self.last_name, email) def __eq__(self, other): return self.user_name == other.user_name def __hash__(self): return hash(self.user_name) def _asdict(self): - 'Return a new OrderedDict which maps field names to their values.' + """Return a new OrderedDict which maps field names to their values.""" return OrderedDict(zip(self._fields, self)) def __getnewargs__(self): - 'Return self as a plain tuple. Used by copy and pickle.' + """Return self as a plain tuple. Used by copy and pickle.""" return tuple(self) user_name = _property(_itemgetter(0), doc='Alias for field number 0') e_mail = _property(_itemgetter(1), doc='Alias for field number 1') last_name = _property(_itemgetter(2), doc='Alias for field number 2') first_name = _property(_itemgetter(3), doc='Alias for field number 3') real_email = _property(_itemgetter(4), doc='Alias for field number 4') @export class AuthorDB: def __init__(self, db): self.__db = db self._authors = db.authors __EMAIL = 0 __USERNAME = 1 def __add_missing_author(self, key, key_type): print( "The author identified by the key \'{0}\' is not know please enter the information for this author.".format( key)) ans = False auth = {'first_name': '', 'last_name': '', 'e_mail': '', 'user_name': ''} while not ans: answer = input("First name [{0}]: ".format(auth['first_name'])) auth['first_name'] = answer if not answer == '' else auth['first_name'] answer = input("Last name [{0}]: ".format(auth['last_name'])) auth['last_name'] = answer if not answer == '' else auth['last_name'] if key_type == self.__EMAIL: answer = input("Username [{0}]: ".format(auth['user_name'])) auth['user_name'] = answer if not answer == '' else auth['user_name'] auth['e_mail'] = key else: auth['user_name'] = key answer = input("E-Mail [{0}]: ".format(auth['e_mail'])) auth['e_mail'] = answer if not answer == '' else auth['e_mail'] print('\nFirst name: {0}\nLast name: {1}\nUsername: {2}\nE-Mail: {3}'.format(auth['first_name'], auth['last_name'], auth['user_name'], auth['e_mail'])) ans = licenser_ask_question("Are this information correct") if ans: auth = Author(auth['user_name'], auth['e_mail'], auth['last_name'], auth['first_name'], '') self.__db.add_author(auth) self._authors.append(auth) return auth def is_author_in_ignore_list(self, author): return self.__db.is_author_in_ignore_list(author) def find_by_user_name(self, user_name): find = [author for author in self._authors if author.user_name == user_name] if len(find) == 0: find.append(self.__add_missing_author(user_name, self.__USERNAME)) return find[0] def find_by_email(self, e_mail): find = [author for author in self._authors if author.e_mail == e_mail] if len(find) == 0: find.append(self.__add_missing_author(e_mail, self.__EMAIL)) return find[0] diff --git a/pylicenser/file_transformer.py b/pylicenser/file_transformer.py index 0e0ec1b..9b7c577 100755 --- a/pylicenser/file_transformer.py +++ b/pylicenser/file_transformer.py @@ -1,254 +1,295 @@ # -*- coding: utf-8 -*- __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" import datetime as dt from . import licenser_ask_question from . import author_db as adb from . import copyright_db as cdb from . import export from . import file_info as fi from . import print_colored from . import version_info as vc import os from pygments import highlight from pygments.lexers.diff import DiffLexer from pygments.formatters.terminal256 import Terminal256Formatter from pygments.formatters.terminal import TerminalFormatter @export class FileTransformer(object): """ Class that reformat the headers """ + __keep_authors = None + _brief = None + __ignore = False _new_header = None def __init__(self, filename, db, release_date=dt.datetime.now().date(), no_author_check=False, no_brief_check=False, force=False, **kwargs): self.__filename = filename self.__release_date = release_date self.__db = db + self.__no_brief_check = no_brief_check + self.__no_author_check = no_author_check if "vc_backend" in kwargs and kwargs["vc_backend"] is not None: vc_back = kwargs["vc_backend"] else: vc_back = db.versioning_backend if vc_back != "none": self._date_style = self.__db.get_config('date_style') self.__repo = self.__db.get_config('repo') self._vc_info = vc.VersionInfo(self.__repo, self.__filename, self.__db.get_list_of_ignore_emails(), backend=vc_back, rev_to=release_date) self.__name = self._vc_info.name self.__filename = self.__repo + '/' + self.__name self._creation_date = self._vc_info.creation_date self._last_modif = self._vc_info.last_modification_date else: self._creation_date = None self._last_modif = None self._date_style = None self._vc_info = None # Check the authors self.__author_db = adb.AuthorDB(db) try: self._file = fi.FileInfo(self.__filename, self.__author_db) except NotImplementedError: print("File {0} ignored due to {1}".format(self._vc_info.name, print_colored('unknown type', 'red', attrs=['bold']))) self.__ignore = True return rev_from = None license_id = None file_maj = None + self.__oldest_name = "" + if vc_back != "none": - file_maj = self.__db.find_file(self._vc_info.oldest_name) + self.__oldest_name = self._vc_info.oldest_name + file_maj = self.__db.find_file(self.__oldest_name) if file_maj is not None: rev_from, license_id = file_maj if not force and rev_from.date() >= release_date: print("File {0} ignored due to recent modifications ({1})".format(self._vc_info.name, print_colored( rev_from.strftime( "%Y-%m-%d"), 'red', attrs=['bold']))) self.__ignore = True return - if not no_author_check: + if not self.__no_author_check: self._vc_authors = self._vc_info.authors self._vc_info.populate(self.__author_db, rev_to=self.__release_date, rev_from=rev_from) self._vc_authors = self._vc_info.authors self._f_authors = self._file.authors self._real_authors = self.__compare_authors(self._f_authors, self._vc_authors, - file_is_good=(file_maj is not None), + file_is_good=False, **kwargs) if self._real_authors - self._f_authors: print("Added authors:\n{0}".format( "\n".join([" @author {0}".format(author) for author in self._real_authors - self._f_authors]))) if self._f_authors - self._real_authors: print("Removed authors:\n{0}".format( "\n".join([" @author {0}".format(author) for author in self._f_authors - self._real_authors]))) else: self._real_authors = self._file.authors # Check the brief - if not no_brief_check: + if not self.__no_brief_check: self.__check_brief(**kwargs) else: self._brief = self._file.get_brief() - if "force_license" in kwargs: + if "force_license" in kwargs and kwargs['force_license'] is not None: license_id = kwargs["force_license"] # Getting the license content self.__copyright_base = cdb.CopyrightDB(db) if license_id is None: copyright_policy = self.__db.get_config('copyright_policy') if copyright_policy == 'creation_date': cdate = self._creation_date.date() elif copyright_policy == 'release_date': cdate = self.__release_date else: cdate = None self._lic = self.__copyright_base.find_by_date(cdate) else: self._lic = self.__copyright_base.find_by_id(license_id) if self._lic is None: raise ("The license with the id {0} is not defined".format(license_id)) date_format = self.__db.get_config('date_format') # Generates the new header file self._new_header = self._file.generate_header( real_authors=self._real_authors, copyright_txt=self._lic.text, date_format=date_format, date_style=self._date_style, last_modification_date=self._last_modif, creation_date=self._creation_date, brief=self._brief) # noinspection PyPep8Naming,PyUnusedLocal def __compare_authors(self, file_known_tmp, vc_known, file_is_good=False, ignore_threshold=0, **kwargs): file_known = file_known_tmp if len(file_known) == 0 and len(vc_known) == 1: file_known = vc_known if not file_is_good: - file_only = file_known - vc_known + last_keep = None + if len(self.__oldest_name) != 0: + last_keep = self.__db.get_last_keep_authors(self.__oldest_name, self.__author_db) + if last_keep is None: + last_keep = [] + + for author in last_keep: + print("Keeping @author {0} ({1})".format(author, + print_colored("previously validated", + 'red', + attrs=['bold']))) + + file_only = file_known - vc_known - set(last_keep) to_remove = set() if file_only: KEEP = 0 REMOVE = 1 for author in file_only: answer = licenser_ask_question( "Do you want to remove" + " @author {0}".format(author), {'k': KEEP, 'r': REMOVE}, 'k') if answer == REMOVE: to_remove.add(author) file_known = file_known - to_remove + self.__keep_authors = file_known - to_remove - vc_known vc_only = vc_known - file_known if vc_only: to_add = set() YES = 0 NO = 1 DIFF = 2 for author in vc_only: modifications, stats = self._vc_info.modifications_by_author(author.user_name) number_of_modified_lines = sum([len(e[2]) for e in modifications]) number_of_modifications = len(modifications) if number_of_modified_lines <= ignore_threshold or number_of_modified_lines == number_of_modifications: print('Potential new author' + ' @author {0} '.format(author) + '({0} modifications, lines count {1}) {2}'.format(number_of_modifications, number_of_modified_lines, print_colored("[ignored do to threshold]", 'red', attrs=['bold']))) continue answer = DIFF while answer == DIFF: answer = licenser_ask_question( 'Do you want to add' + ' @author {0}'.format(print_colored(author, "blue")) + ' ({0} modifications, lines count {1}) '.format(number_of_modifications, number_of_modified_lines), {'d': DIFF, 'n': NO, 'y': YES}, 'n') if answer == DIFF: for info, modif, nb_lines, diff_stats in modifications: if len(modif) > 2: print(info) formatter = (Terminal256Formatter if '256color' in os.environ.get('TERM', '') else TerminalFormatter) print(highlight(modif, DiffLexer(), formatter())) elif answer == YES: to_add.add(author) file_known = file_known.union(to_add) return file_known # noinspection PyUnusedLocal def __check_brief(self, ignore_filled_briefs=False, **kwargs): self._brief = self._file.get_brief() if self._brief and not ignore_filled_briefs: - keep_brief = licenser_ask_question("\"{0}\"\nDo you want to keep this brief".format(self._brief)) - if not keep_brief: - self._brief = False + if len(self.__oldest_name) != 0: + old_brief = self.__db.get_last_brief(self.__oldest_name) + else: + old_brief = None + + if old_brief is not None and old_brief == self._brief: + print('Brief: {0} ({1})'.format(self._brief, print_colored("previously validated", + 'red', attrs=['bold']))) + else: + keep_brief = licenser_ask_question("\"{0}\"\nDo you want to keep this brief".format(self._brief)) + if not keep_brief: + self._brief = False if not self._brief: res = input("Please type the brief for file: ") brief = [] while res is not "": brief.append(res) res = input("> ") self._brief = "\n".join(brief) def replace_file(self, dry_run=True): if self.__ignore: return if dry_run: print(self._new_header) else: if self._vc_info is not None: - self.__db.update_file(self._vc_info.oldest_name, self._lic.lid, self.__release_date) + self.__db.update_license_file(self.__oldest_name, + self._lic.lid, + self.__release_date) + if self._brief is not None and not self.__no_brief_check: + self.__db.update_brief_file(self.__oldest_name, + self._brief, + self.__release_date) + if self.__keep_authors is not None and not self.__no_author_check: + self.__db.update_keep_authors_file(self.__oldest_name, + self.__keep_authors, + self.__release_date) + self._file.replace_file(self._new_header) diff --git a/pylicenser/licenser_db.py b/pylicenser/licenser_db.py index 241d1b0..d365658 100755 --- a/pylicenser/licenser_db.py +++ b/pylicenser/licenser_db.py @@ -1,259 +1,293 @@ # -*- coding: utf-8 -*- __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" from . import export from . import author_db as adb import sqlite3 as lite from collections import namedtuple from datetime import datetime as dt import os CopyRight = namedtuple('CopyRight', ['lid', 'on_modifications_txt', 'start', 'end', 'text']) SQLITE = 0 CSV = 1 @export class LicenserDB: _project = "" _versioning_backend = "svn" _configs = { 'project': "", 'versioning_backend': 'svn', 'date_format': "%a %b %d %H:%M:%S %Y", 'date_style': 'last_modification', 'copyright_policy': 'creation_date' } def __new__(cls, filename, backend=None): """ Factory constructor depending on the chosen backend """ if backend is None: _filepath = os.path.expanduser(filename) garbage, ext = os.path.splitext(os.path.basename(_filepath)) if ext == ".csv": backend = CSV elif ext == ".db": backend = SQLITE else: raise (Exception("Unknown file format")) if backend == SQLITE: obj = super().__new__(LicenserSQLITEDB) elif backend == CSV: obj = super().__new__(LicenserCSVDB) else: raise (Exception("Not a known backend")) return obj @property def authors(self): return None def add_author(self, author): pass @property def copyrights(self): return None @property def project(self): return self._configs['project'] @property def versioning_backend(self): return self._configs['versioning_backend'] def get_config(self, conf): return self._configs[conf] if conf in self._configs else None def find_file(self, name): return None - def update_file(self, filename, license_id, update=dt.now().date()): + def update_license_file(self, filename, license_id, update=dt.now().date()): pass def is_author_in_ignore_list(self, author): return False @export class LicenserSQLITEDB(LicenserDB): # noinspection PyTypeChecker def __init__(self, filename): self.__connection = lite.connect(filename) self.__connection.row_factory = lite.Row self.__cursor = self.__connection.cursor() with self.__connection: self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'config' ''' + ''' ('property' TEXT, 'value' TEXT)''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'authors' ''' + ''' ('uid' TEXT NOT NULL, 'last_name' TEXT NOT NULL, 'first_name' TEXT NOT NULL,''' + ''' 'email' TEXT NOT NULL, 'real_email' TEXT DEFAULT '')''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'authors_ignore_list' ''' + ''' ('uid' TEXT)''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'copyrights' ''' + ''' ('lid' TEXT, 'from' TEXT, 'to' TEXT, 'on_modifications_txt', 'text' BLOB)''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'files' ''' + - ''' ('filename' TEXT, 'last_modif' TEXT, 'license_id' TEXT,''' + + ''' ('filename' TEXT, 'last_modif' TEXT, 'property' TEXT, 'value' TEXT,''' + ''' PRIMARY KEY('filename'))''') - self.__cursor.execute('''SELECT * FROM 'config' AS c''') properties = self.__cursor.fetchall() for prop in properties: _property = prop['property'] self._configs[_property] = prop['value'] @property def authors(self): with self.__connection: self.__cursor.execute( '''SELECT a.'uid', a.'email', a.'last_name', a.'first_name', a.'real_email' ''' + ''' FROM 'authors' AS a''') authors = self.__cursor.fetchall() return [adb.Author(*[str(e) if e is not None else e for e in author]) for author in authors] def is_author_in_ignore_list(self, author): with self.__connection: self.__cursor.execute( '''SELECT a.'uid' ''' + ''' FROM 'authors_ignore_list' AS a''' + ''' WHERE a.'uid' = :uid''', {"uid": author.user_name}) authors = self.__cursor.fetchall() return len(authors) > 0 # noinspection PyTypeChecker def get_list_of_ignore_emails(self): with self.__connection: self.__cursor.execute( '''SELECT a.'email' ''' + ''' FROM 'authors_ignore_list' AS i, 'authors' AS a''' + ''' WHERE a.'uid' = i.'uid' ''') authors = self.__cursor.fetchall() return [str(author['email']) for author in authors] def add_author(self, author): with self.__connection: self.__cursor.execute( '''INSERT INTO 'authors' ('uid', 'first_name', 'last_name', 'email')''' + '''VALUES (:uid, :first_name, :last_name, :email)''', {'uid': str(author.user_name), 'last_name': str(author.last_name), 'first_name': str(author.first_name), 'email': str(author.e_mail)}) # noinspection PyTypeChecker @property def copyrights(self): with self.__connection: self.__cursor.execute( '''SELECT c.'lid', c.'on_modifications_txt', c.'from', c.'to', c.'text' ''' + ''' FROM 'copyrights' AS c''') copyrights = self.__cursor.fetchall() return [CopyRight(copy['lid'], str(copy['on_modifications_txt']), dt.strptime(copy['from'], "%Y-%m-%d").date(), dt.strptime(copy['to'], "%Y-%m-%d").date(), - str(copy['text']) if not type(copy['text']) == bytes else copy['text'].decode('utf-8')) for copy in copyrights] + str(copy['text']) if not type(copy['text']) == bytes else copy['text'].decode('utf-8')) + for copy in copyrights] # noinspection PyTypeChecker def find_file(self, filename): + return self._get_file_property(filename, 'license_id') + + def _update_file_property(self, filename, property_name, value, update_time): with self.__connection: self.__cursor.execute( - '''SELECT f.'last_modif', f.'license_id' ''' + - ''' FROM 'files' AS f WHERE f.'filename'=:filename''', - {'filename': filename}) - res = self.__cursor.fetchone() - if res is not None: - return [dt.strptime(res['last_modif'], "%Y-%m-%d"), res['license_id']] + '''INSERT OR REPLACE INTO 'files' ('filename', 'last_modif', 'property', 'value') ''' + + ''' VALUES (:filename, :update, :property, :value)''', + {'filename': filename, + 'property': property_name, + 'value': value, + 'update': update_time.strftime("%Y-%m-%d")}) - def update_file(self, filename, license_id, update=dt.now().date()): + # noinspection PyTypeChecker + def _get_file_property(self, filename, property_name): with self.__connection: self.__cursor.execute( - '''INSERT OR REPLACE INTO 'files' ('filename', 'last_modif', 'license_id') ''' + - ''' VALUES (:filename, :update, :license_id)''', + '''SELECT f.'last_modif', f.'value' ''' + + ''' FROM 'files' AS f WHERE f.'filename'=:filename AND f.'property'=:property ''', {'filename': filename, - 'license_id': license_id, - 'update': update.strftime("%Y-%m-%d")}) + 'property': property_name}) + res = self.__cursor.fetchone() + if res is not None: + return [dt.strptime(res['last_modif'], "%Y-%m-%d"), res['value']] + + def update_license_file(self, filename, license_id, update=dt.now().date()): + self._update_file_property(filename, 'license_id', license_id, update) + + def update_brief_file(self, filename, brief, update=dt.now().date()): + self._update_file_property(filename, 'brief', brief, update) + + def update_keep_authors_file(self, filename, keep_authors, update=dt.now().date()): + self._update_file_property(filename, + 'authors_keep', + ';'.join([auth.user_name for auth in keep_authors]), + update) + + def get_last_brief(self, filename): + brief = self._get_file_property(filename, 'brief') + if brief is not None: + return brief[1] + + def get_last_keep_authors(self, filename, author_db): + res = self._get_file_property(filename, 'authors_keep') + if res is not None: + authors = res[1].split(";") + keep_authors = [] + for auth in authors: + if len(auth) != 0: + a = author_db.find_by_user_name(auth) + keep_authors.append(a) + return keep_authors class LicenserCSVDB(LicenserDB): __author_file = '' __copyright_file = '' __treated_files = dict() # noinspection PyUnusedLocal def __init__(self, filename, **kwargs): self.__config_file = filename with open(self.__config_file) as fh: split_lines = [line.strip().split(';') for line in fh] if len(split_lines) > 1: raise (Exception("Too many lines in the configuration file")) self._configs['project'], self.__author_file, self.__copyright_file, self._configs['versioning_backend'] = [ token.strip('"') for token in split_lines[0]] with open('treated_files.list', 'r') as f: for l in f: f, date, lid = l.split(";") f = f.strip() date = dt.strptime(date, "%Y-%m-%d") self.__treated_files[f] = [date, lid.strip()] @property def authors(self): with open(self.__author_file) as fh: next(fh) split_lines = [line.strip().split(';') for line in fh] - return [adb.Author._make((token.strip('"') for token in line)) for line in split_lines] + return [adb.Author.make((token.strip('"') for token in line)) for line in split_lines] def add_author(self, author): with open(self.__author_file, 'a') as fh: fh.write(';'.join(['\"{0}\"'.format(val) for val in author]) + '\n') def find_file(self, filename): if filename in self.__treated_files: return self.__treated_files[filename] return None - def update_file(self, filename, license_id, update=dt.now().date()): + def update_license_file(self, filename, license_id, update=dt.now().date()): self.__treated_files[filename] = [update.strftime("%Y-%m-%d"), license_id] with open('treated_files.list', 'w') as f: f.write("\n".join([";".join(f) for f in self.__treated_files.items])) @property def copyrights(self): with open(self.__copyright_file) as fh: split_lines = [line.strip().split(';') for line in fh] tab = ([[token.strip('"') for token in line] for line in split_lines]) return [CopyRight._make([lid, on_modif, dt.strptime(start, "%d-%m-%Y").date(), dt.strptime(end, "%d-%m-%Y").date(), "".join([line for line in open(license_file)])]) for lid, on_modif, start, end, license_file in tab] diff --git a/pylicenser/vcs/git.py b/pylicenser/vcs/git.py index 3bb1009..edcf166 100644 --- a/pylicenser/vcs/git.py +++ b/pylicenser/vcs/git.py @@ -1,136 +1,139 @@ """ Gather info for git based repos """ __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" import git import binascii import datetime as dt from .. import export from .. import version_info -from .. import print_colored @export class GITInfo(version_info.VersionInfo): """ git implementation of the version info class """ __repo = None __other_names = [] def __init__(self, repo, filename, ignore_list, rev_to=None, **kwargs): - super().__init__(repo, filename, **kwargs) + super().__init__(repo, filename, ignore_list, **kwargs) self.__repo = git.Repo(self._repo) self._name = filename.replace(self._repo, '') commits = self._list_commits(rev_to=rev_to, ignore_list=ignore_list) self._modification_dates = [] for c in commits: if c.author.email not in ignore_list: self._modification_dates.append(dt.datetime.fromtimestamp(c.authored_date)) self.__other_names = self._list_names(commits) # noinspection PyIncorrectDocstring def populate(self, author_db, rev_from=None, rev_to=None): """ populate the internal variables """ _commits = self._list_commits(rev_to=rev_to, rev_from=rev_from) for c in _commits: email = c.author.email a = author_db.find_by_email(email) auth = a.user_name if auth not in self._author_list: self._author_list[auth] = [] if not author_db.is_author_in_ignore_list(a): self._authors.add(a) d = dt.datetime.fromtimestamp(c.authored_date) rev = c.hexsha msg = c.message self._author_list[auth].append((d, rev, msg)) def get_modifications(self, revision): hexsha = binascii.unhexlify(revision) commit = git.objects.commit.Commit(self.__repo, hexsha) stats = commit.stats.total patches = [] for parent in commit.parents: diffs = commit.diff(parent, R=True, create_patch=True, ignore_space_change=True, diff_algorithm='minimal', paths=[self.__other_names]) patches.extend([d.diff.decode('utf-8', errors="surrogateescape") for d in diffs]) return '\n'.join(patches), stats def _list_commits(self, rev_to=None, rev_from=None, ignore_list=None): if ignore_list is None: ignore_list = [] _args = {'follow': True, 'all': True, 'pretty': 'tformat:%H'} if rev_from is not None: _args['since'] = rev_from.strftime("%Y-%m-%d") if rev_to is not None: _args['until'] = rev_to.strftime("%Y-%m-%d") # git log can follow better renames in case there where badly done (add/rm instead of mv) git_cmd = git.cmd.Git(working_dir=self.__repo.working_dir) str_c = git_cmd.log(self._name, **_args) - binhashes = (binascii.unhexlify(c) for c in str_c.split('\n')) + if len(str_c) != 0: + binhashes = (binascii.unhexlify(c) for c in str_c.split('\n')) + else: + binhashes = [] + list_commits = [] for b in binhashes: c = git.objects.commit.Commit(self.__repo, b) if c.author.email not in ignore_list: list_commits.append(c) if len(list_commits) == 0: del _args['follow'] del _args['pretty'] return list(self.__repo.iter_commits(paths=self._name, **_args)) # list_commits = list(self.__repo.iter_commits(paths=self._name, **_args)) return list_commits def _list_names(self, commits): """ Finds all the names of a given file """ names = [self._name] for c in commits: for p in c.parents: diffs = c.diff(p, R=True, M=True) for d in diffs.iter_change_type('R'): if d.renamed and d.rename_to in names: new_path = d.rename_from self._names[new_path] = c.hexsha names.append(new_path) if len(self._names.keys()) == 0: c = commits[-1] self._names[self._name] = c.hexsha names.reverse() return names @property def oldest_name(self): return self.__other_names[0] diff --git a/pylicenser/vcs/svn.py b/pylicenser/vcs/svn.py index 2285e1d..9a5678f 100644 --- a/pylicenser/vcs/svn.py +++ b/pylicenser/vcs/svn.py @@ -1,123 +1,122 @@ """ Gather info for subversion based repos """ __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" from .. import export from .. import print_colored from .. import version_info import pysvn import re import datetime as dt @export class SVNInfo(version_info.VersionInfo): """ subversion implementation of the version info class """ - # noinspection PyUnusedLocal def __init__(self, repo, filename, ignore_list, rev_to=None, **kwargs): - super().__init__(repo, filename, **kwargs) + super().__init__(repo, filename, ignore_list, **kwargs) self._client = pysvn.Client() self._root_url = self._client.root_url_from_path(repo) self._infos = self._client.info(self._filename) if rev_to is not None: rev_start = pysvn.Revision(pysvn.opt_revision_kind.date, rev_to.strftime('%s')) else: rev_start = pysvn.Revision(pysvn.opt_revision_kind.head) self._name = self._infos.url.replace(self._root_url, '') self._logs = self._client.log(self._filename, strict_node_history=False, discover_changed_paths=True, revision_start=rev_start) prev_name = self._name self._logs = sorted(self._logs, key=lambda log: log["revision"].number, reverse=True) for entry in self._logs: d = dt.datetime.fromtimestamp(entry["date"]) self._modification_dates.append(d) # checking if the name as changed old_names = [ ch_path for ch_path in entry["changed_paths"] if (re.match("{0}.*".format(ch_path["path"]), prev_name)) ] if len(old_names) > 0 and old_names[0]["copyfrom_path"] is not None: m = re.match("{0}(.*)".format(old_names[0]["path"]), prev_name) tmp_prev_name = prev_name prev_name = "{0}{1}".format(old_names[0]["copyfrom_path"], m.group(1) if m else "") self._names[old_names[0]["copyfrom_revision"].number] = tmp_prev_name self._names[0] = prev_name # print("\n".join(["{0} {1}".format(n, r) for r, n in self._names.items() ])) @property def oldest_name(self): return self._names[0] def populate(self, author_db, rev_from=None, rev_to=None): if rev_from is not None: rev_end = pysvn.Revision(pysvn.opt_revision_kind.date, rev_from.strftime('%s')) else: rev_end = pysvn.Revision(pysvn.opt_revision_kind.number, 0) if rev_to is not None: rev_start = pysvn.Revision(pysvn.opt_revision_kind.date, rev_to.strftime('%s')) else: rev_start = pysvn.Revision(pysvn.opt_revision_kind.head) self._logs = self._client.log(self._filename, strict_node_history=False, revision_start=rev_start, revision_end=rev_end) self._logs = sorted(self._logs, key=lambda log: log["revision"].number) for entry in self._logs: auth = entry["author"] rev = entry["revision"] d = dt.datetime.fromtimestamp(entry["date"]) msg = entry["message"] if "message" in entry else "" self._revisions.append(rev) if auth not in self._author_list: self._author_list[auth] = [] a = author_db.find_by_user_name(auth) if not author_db.is_author_in_ignore_list(a): self._authors.add(a) self._author_list[auth].append((d, rev, msg)) def __find_previous_release(self, r): revs = [ rev.number for rev in self._revisions if rev.number < r] rev = 0 if len(revs) == 0 else max(revs) return pysvn.Revision(pysvn.opt_revision_kind.number, rev) def get_modifications(self, revision): r_pre = pysvn.Revision(pysvn.opt_revision_kind.number, revision.number -1) # self.__find_previous_release(revision.number) relevant_rev = max([ n for n in self._names.keys() if revision.number > n ]) try: # print("{0} r{1}:{2}".format(self._root_url + self._names[relevant_rev], # r_pre.number, # revision.number)) res_diff = self._client.diff("/tmp", self._root_url + self._names[relevant_rev], revision1=r_pre, revision2=revision, diff_options=['-b', '-w']) stats = {'modifications': len(res_diff)} return res_diff,stats except Exception as e: pass return "" diff --git a/pylicenser/version_info.py b/pylicenser/version_info.py index 661c482..f315e39 100755 --- a/pylicenser/version_info.py +++ b/pylicenser/version_info.py @@ -1,89 +1,89 @@ # -*- coding: utf-8 -*- __author__ = "Guillaume Anciaux, and Nicolas Richart" __copyright__ = "Copyright (C) 2015, EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory " \ "(LSMS - Laboratoire de Simulation en Mécanique des Solides)" __credits__ = ["Guillaume Anciaux", "Nicolas Richart"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Nicolas Richart" __email__ = "nicolas.richart@epfl.ch" from . import print_colored from . import export import os @export class VersionInfo: """Generic class handling the communication with the versioning system""" # members _authors = set() _author_list = dict() _modification_dates = [] _name = "" _repo = "" _filename = "" _revisions = [] _names = dict() def __new__(cls, repo, filename, ignore_list, backend=None, **kwargs): """ Factory constructor depending on the chosen backend """ if backend == 'svn': from .vcs import svn return super().__new__(svn.SVNInfo) elif backend == 'git': from .vcs import git return super().__new__(git.GITInfo) else: raise Exception("{0} not a known backend".format(backend)) # noinspection PyUnusedLocal - def __init__(self, repo, filename, **kwargs): + def __init__(self, repo, filename, ignore_list, **kwargs): self._repo = os.path.expanduser(repo) self._filename = os.path.expanduser(filename) @property def creation_date(self): return min(self._modification_dates) @property def last_modification_date(self): return max(self._modification_dates) @property def authors(self): return self._authors def number_of_modifications(self, author): return len(self._author_list[author]) def modifications_by_author(self, author): auth = self._author_list[author] res = [] stats = {} for d, r, msg in auth: date = d.strftime("%d-%m-%Y") info = print_colored("@ {0} rev {1} msg {2}".format(date, r, msg), 'blue', attrs=['bold']) modif, _stats = self.get_modifications(r) - for k,v in _stats.items(): + for k, v in _stats.items(): if k in stats: stats[k] += v else: stats[k] = v res.append([info, modif, modif.split("\n"), _stats]) return res, stats @property def name(self): return self._name @property def oldest_name(self): return self._name def get_modifications(self, revision): pass