diff --git a/pylicenser/file_transformer.py b/pylicenser/file_transformer.py index 9f20650..d0712b7 100755 --- a/pylicenser/file_transformer.py +++ b/pylicenser/file_transformer.py @@ -1,359 +1,360 @@ # -*- coding: utf-8 -*- """ file_transformer.py: Coordination of the different transformations""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" import datetime as dt from . import licenser_ask_question from . import author_db as adb from . import copyright_db as cdb from . import export from . import file_info as fi from . import print_colored from . import version_info as vc import os from pygments import highlight from pygments.lexers.diff import DiffLexer from pygments.formatters.terminal256 import Terminal256Formatter from pygments.formatters.terminal import TerminalFormatter @export class FileTransformer(object): """ Class that reformat the headers """ __keep_authors = None _brief = None __ignore = False _new_header = None def __init__(self, filename, db, release_date=dt.datetime.now().date(), no_author_check=False, no_brief_check=False, force=False, version=None, **kwargs): self.__filename = filename self.__release_date = release_date self.__db = db self.__no_brief_check = no_brief_check self.__no_author_check = no_author_check self.__version = version # Check the authors self.__author_db = adb.AuthorDB(db) if "vc_backend" in kwargs and kwargs["vc_backend"] is not None: vc_back = kwargs["vc_backend"] else: vc_back = db.versioning_backend if vc_back != "none": self._date_style = self.__db.get_config('date_style') self.__repo = self.__db.get_config('repo') self._vc_info = vc.VersionInfo( self.__repo, self.__filename, - self.__db.get_list_of_ignore_emails(), + authors_ignore_list=self.__db.get_list_of_ignore_emails(), + commits_ignore_list=self.__db.list_of_commits_to_ignore(), backend=vc_back, rev_to=release_date) self.__name = self._vc_info.name self.__filename = self.__repo + '/' + self.__name self._creation_date = self._vc_info.creation_date self._last_modif = self._vc_info.last_modification_date else: self._creation_date = None self._last_modif = None self._date_style = None self._vc_info = None try: self._file = fi.FileInfo(self.__filename, self.__author_db) except NotImplementedError: print("File {0} ignored due to {1}".format( self._vc_info.name, print_colored('unknown type', 'red', attrs=['bold']))) self.__ignore = True return rev_from = None license_id = None self.__oldest_name = "" if vc_back != "none": self.__oldest_name = self._vc_info.oldest_name file_maj = self.__db.find_file(self.__oldest_name) if file_maj is not None: rev_from, license_id = file_maj if not force and rev_from.date() >= release_date: print(("File {0} ignored due to recent " + "modifications ({1})").format( self._vc_info.name, print_colored( rev_from.strftime("%Y-%m-%d"), 'red', attrs=['bold']))) self.__ignore = True return if not self.__no_author_check: self._vc_authors = self._vc_info.authors self._vc_info.populate(self.__author_db, rev_to=self.__release_date, rev_from=rev_from) self._vc_authors = self._vc_info.authors self._f_authors = self._file.authors self._real_authors = self.__compare_authors(self._f_authors, self._vc_authors, file_is_good=False, **kwargs) if self._real_authors - self._f_authors: print("Added authors:\n{0}".format( "\n".join([ " @author {0}".format(author) for author in self._real_authors - self._f_authors ]))) if self._f_authors - self._real_authors: print( "Removed authors:\n{0}".format( "\n".join([ " @author {0}".format(author) for author in self._f_authors - self._real_authors ]))) else: self._real_authors = self._file.authors if len(self._real_authors) == 0: res = input("This file as no author please provide " + "the username of the authors: ") while res != "": self._real_authors.add(self.__author_db.find_by_user_name(res)) res = input("> ") # Check the brief if not self.__no_brief_check: self.__check_brief(**kwargs) else: self._brief = self._file.get_brief() if "force_license" in kwargs and kwargs['force_license'] is not None: license_id = kwargs["force_license"] # Getting the license content self.__copyright_base = cdb.CopyrightDB(db) if license_id is None: copyright_policy = self.__db.get_config('copyright_policy') if copyright_policy == 'creation_date': cdate = self._creation_date.date() elif copyright_policy == 'release_date': cdate = self.__release_date else: cdate = None self._lic = self.__copyright_base.find_by_date(cdate) else: self._lic = self.__copyright_base.find_by_id(license_id) if self._lic is None: raise ("The license with the id {0} is not defined".format( license_id)) date_format = self.__db.get_config('date_format') version_config = self.__db.get_config('date_format') # Generates the new header file _header_params = { 'authors': self._real_authors, 'copyright_txt': self._lic.text, 'date_format': date_format, 'date_style': self._date_style, 'last_modification_date': self._last_modif, 'creation_date': self._creation_date, 'brief': self._brief, 'version': self.__version } if version_config == 1: self._new_header = self._file.generate_header( **_header_params) else: self._new_header = self._file.generate_header_2( license_header=self._lic.header, need_header=self._lic.need_header, license=self._lic.license, **_header_params ) # noinspection PyPep8Naming,PyUnusedLocal def __compare_authors(self, file_known_tmp, vc_known, file_is_good=False, ignore_threshold=0, yes=False, **kwargs): file_known = file_known_tmp if len(file_known) == 0 and len(vc_known) == 1: file_known = vc_known if not file_is_good: last_keep = None if len(self.__oldest_name) != 0: last_keep = self.__db.get_last_keep_authors( self.__oldest_name, self.__author_db) if last_keep is None: last_keep = [] for author in last_keep: print("Keeping @author {0} ({1})".format( author, print_colored("previously validated", 'red', attrs=['bold']))) file_only = file_known - vc_known - set(last_keep) to_remove = set() if file_only: KEEP = 0 REMOVE = 1 for author in file_only: if not yes: answer = licenser_ask_question( "Do you want to remove @author {0}".format(author), {'k': KEEP, 'r': REMOVE}, 'k') if answer == REMOVE: to_remove.add(author) else: print(("Do you want to remove " + "@author {0}? ({1}/r)").format( author, print_colored('K', 'red', attrs=['bold']) )) file_known = file_known - to_remove self.__keep_authors = file_known - to_remove vc_only = vc_known - file_known if vc_only: to_add = set() YES = 0 NO = 1 DIFF = 2 for author in vc_only: modifications, stats = self._vc_info.modifications_by_author( author.user_name) number_of_modified_lines = sum( [len(e[2]) for e in modifications]) number_of_modifications = len(modifications) if ((number_of_modified_lines <= ignore_threshold) or (number_of_modified_lines == number_of_modifications)): print('Potential new author' + ' @author {0} '.format(author) + '({0} modifications, lines count {1}) {2}'.format( number_of_modifications, number_of_modified_lines, print_colored( "[ignored do to threshold {0}]".format( ignore_threshold), 'red', attrs=['bold']))) continue answer = DIFF while answer == DIFF: answer = licenser_ask_question( 'Do you want to add' + ' @author {0}'.format(print_colored(author, "blue")) + ' ({0} modifications, lines count {1}) '.format( number_of_modifications, number_of_modified_lines), {'d': DIFF, 'n': NO, 'y': YES}, 'n') if answer == DIFF: for info, modif, nb_lines, diff_stats in modifications: if len(modif) > 2: print(info) formatter = ( Terminal256Formatter if '256color' in os.environ.get('TERM', '') else TerminalFormatter) print(highlight(modif, DiffLexer(), formatter())) elif answer == YES: to_add.add(author) file_known = file_known.union(to_add) return file_known # noinspection PyUnusedLocal def __check_brief(self, ignore_filled_briefs=False, yes=False, **kwargs): self._brief = self._file.get_brief() if self._brief and not ignore_filled_briefs: if len(self.__oldest_name) != 0: old_brief = self.__db.get_last_brief(self.__oldest_name) else: old_brief = None if old_brief is not None and old_brief == self._brief: print('Brief: {0} ({1})'.format( self._brief, print_colored("previously validated", 'red', attrs=['bold']))) else: if not yes: keep_brief = licenser_ask_question( ("\"{0}\"\n" + "Do you want to keep this brief").format(self._brief)) if not keep_brief: self._brief = False else: print(("\"{0}\"\nDo you want to keep" + " this brief ? ({1}/n)").format( self._brief, print_colored('Y', 'red', attrs=['bold']))) if not self._brief: res = input("Please type the brief for file: ") brief = [] while res != "": brief.append(res) res = input("> ") self._brief = "\n".join(brief) def replace_file(self, dry_run=True): if self.__ignore: return if dry_run: print(self._new_header) else: if self._vc_info is not None: self.__db.update_license_file(self.__oldest_name, self._lic.lid, self.__release_date) if self._brief is not None and not self.__no_brief_check: self.__db.update_brief_file(self.__oldest_name, self._brief, self.__release_date) if ((self.__keep_authors is not None) and (not self.__no_author_check)): self.__db.update_keep_authors_file(self.__oldest_name, self.__keep_authors, self.__release_date) self._file.replace_file(self._new_header) diff --git a/pylicenser/licenser_db.py b/pylicenser/licenser_db.py index a0599e0..fa28e95 100755 --- a/pylicenser/licenser_db.py +++ b/pylicenser/licenser_db.py @@ -1,379 +1,392 @@ # -*- coding: utf-8 -*- """ licenser_db.py: Interaction with the configuration DB (SQLite or CSV)""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" from . import export from . import author_db as adb import sqlite3 as lite from collections import namedtuple from datetime import datetime as dt import os CopyRight = namedtuple('CopyRight', ['lid', 'on_modifications_txt', 'start', 'end', 'text']) CopyRight2 = namedtuple('CopyRight2', ['lid', 'license', 'on_modifications_txt', 'start', 'end', 'text', 'need_header', 'header']) SQLITE = 0 CSV = 1 @export class LicenserDB: _project = "" _versioning_backend = "svn" _configs = { 'project': "", 'versioning_backend': 'svn', 'date_format': "%a %b %d %H:%M:%S %Y", 'date_style': 'last_modification', 'copyright_policy': 'creation_date' } def __new__(cls, filename, backend=None): """ Factory constructor depending on the chosen backend """ if backend is None: _filepath = os.path.expanduser(filename) garbage, ext = os.path.splitext(os.path.basename(_filepath)) if ext == ".csv": backend = CSV elif ext == ".db": backend = SQLITE else: raise (Exception("Unknown file format")) if backend == SQLITE: obj = super().__new__(LicenserSQLITEDB) elif backend == CSV: obj = super().__new__(LicenserCSVDB) else: raise (Exception("Not a known backend")) return obj @property def authors(self): return None def add_author(self, author): pass @property def copyrights(self): return None @property def project(self): return self._configs['project'] @property def version_config(self): return 1 @property def versioning_backend(self): return self._configs['versioning_backend'] def get_config(self, conf): return self._configs[conf] if conf in self._configs else None def find_file(self, name): return None def update_license_file(self, filename, license_id, update=dt.now().date()): pass def is_author_in_ignore_list(self, author): return False + def list_of_commits_to_ignore(self): + return [] + @export class LicenserSQLITEDB(LicenserDB): # noinspection PyTypeChecker def __init__(self, filename): self.__connection = lite.connect(filename) self.__connection.row_factory = lite.Row self.__cursor = self.__connection.cursor() with self.__connection: self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'config' ''' + ''' ('property' TEXT, 'value' TEXT)''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'authors' ''' + ''' ('uid' TEXT NOT NULL, 'last_name' TEXT NOT NULL,''' + ''' 'first_name' TEXT NOT NULL,''' + ''' 'email' TEXT NOT NULL, 'real_email' TEXT DEFAULT '')''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'authors_ignore_list' ''' + ''' ('uid' TEXT)''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'copyrights' ''' + ''' ('lid' TEXT, 'from' TEXT, 'to' TEXT,''' + ''' 'on_modifications_txt', 'text' BLOB)''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'files' ''' + ''' ('filename' TEXT, 'last_modif' TEXT, 'property' TEXT,''' + ''' 'value' TEXT,''' + ''' PRIMARY KEY('filename'))''') self.__cursor.execute('''SELECT * FROM 'config' AS c''') properties = self.__cursor.fetchall() for prop in properties: _property = prop['property'] self._configs[_property] = prop['value'] @property def authors(self): with self.__connection: self.__cursor.execute( '''SELECT a.'uid', a.'email', a.'last_name',''' + ''' a.'first_name', a.'real_email' ''' + ''' FROM 'authors' AS a''') authors = self.__cursor.fetchall() return [adb.Author( *[str(e) if e is not None else e for e in author]) for author in authors] def is_author_in_ignore_list(self, author): with self.__connection: self.__cursor.execute( '''SELECT a.'uid' ''' + ''' FROM 'authors_ignore_list' AS a''' + ''' WHERE a.'uid' = :uid''', {"uid": author.user_name}) authors = self.__cursor.fetchall() return len(authors) > 0 # noinspection PyTypeChecker def get_list_of_ignore_emails(self): with self.__connection: self.__cursor.execute( '''SELECT a.'email' ''' + ''' FROM 'authors_ignore_list' AS i, 'authors' AS a''' + ''' WHERE a.'uid' = i.'uid' ''') authors = self.__cursor.fetchall() - return [str(author['email']) for author in authors] + return [] + + def list_of_commits_to_ignore(self): + with self.__connection: + self.__cursor.execute( + '''SELECT c.'sha' ''' + + ''' FROM 'commits' AS c''' + + ''' WHERE c.'ignore' == 1''') + commits = self.__cursor.fetchall() + return [str(commit['sha']) for commit in commits] + return [] def add_author(self, author): with self.__connection: self.__cursor.execute( '''INSERT INTO 'authors' ('uid', 'first_name',''' + ''' 'last_name', 'email')''' + '''VALUES (:uid, :first_name, :last_name, :email)''', {'uid': str(author.user_name) if str(author.user_name) != 'None' else None, 'last_name': str(author.last_name) if str(author.last_name) != 'None' else None, 'first_name': str(author.first_name) if str(author.first_name) != 'None' else None, 'email': str(author.e_mail) if str(author.e_mail) != 'None' else None}) # noinspection PyTypeChecker @property def copyrights(self): with self.__connection: if self.version_config == 1: self.__cursor.execute( '''SELECT c.'lid', c.'on_modifications_txt',''' + ''' c.'from', c.'to', c.'text' ''' + ''' FROM 'copyrights' AS c''') copyrights = self.__cursor.fetchall() return [ CopyRight(copy['lid'], str(copy['on_modifications_txt']), dt.strptime(copy['from'], "%Y-%m-%d").date(), dt.strptime(copy['to'], "%Y-%m-%d").date(), (str(copy['text']) if not type(copy['text']) == bytes else copy['text'].decode('utf-8'))) for copy in copyrights] self.__cursor.execute( '''SELECT c.'lid', c.'license', c.'on_modifications_txt',''' + ''' c.'from', c.'to', c.'begin', c.'need_header', c.'text' ''' + ''' FROM 'copyrights' AS c''') copyrights = self.__cursor.fetchall() _copyrights = [] _default_copyright = '' if 'default_copyright' in self._configs: _default_copyright = self._configs['default_copyright'] if type(_default_copyright) == bytes: _default_copyright = _default_copyright.decode('utf-8') for _copyright in copyrights: _copyright_txt = _default_copyright if _copyright['text'] != '': _copyright_txt = _copyright['text'] _copyrights.append( CopyRight2( _copyright['lid'], _copyright['license'], str(_copyright['on_modifications_txt']), dt.strptime(_copyright['from'], "%Y-%m-%d").date(), dt.strptime(_copyright['to'], "%Y-%m-%d").date(), _copyright_txt.format(begin=_copyright['begin'], end=dt.today().year), bool(int(_copyright['need_header'])), (self._configs['license_header'] if 'license_header' in self._configs else ''))) return _copyrights # noinspection PyTypeChecker def find_file(self, filename): return self._get_file_property(filename, 'license_id') def _update_file_property(self, filename, property_name, value, update_time): with self.__connection: self.__cursor.execute( '''INSERT OR REPLACE INTO 'files' ('filename',''' + ''' 'last_modif', 'property', 'value') ''' + ''' VALUES (:filename, :update, :property, :value)''', {'filename': filename, 'property': property_name, 'value': value, 'update': update_time.strftime("%Y-%m-%d")}) # noinspection PyTypeChecker def _get_file_property(self, filename, property_name): with self.__connection: self.__cursor.execute( '''SELECT f.'last_modif', f.'value' ''' + ''' FROM 'files' AS f''' + ''' WHERE f.'filename'=:filename AND f.'property'=:property ''', {'filename': filename, 'property': property_name}) res = self.__cursor.fetchone() if res is not None: return [dt.strptime(res['last_modif'], "%Y-%m-%d"), res['value']] def update_license_file(self, filename, license_id, update=dt.now().date()): self._update_file_property(filename, 'license_id', license_id, update) def update_brief_file(self, filename, brief, update=dt.now().date()): self._update_file_property(filename, 'brief', brief, update) def update_keep_authors_file(self, filename, keep_authors, update=dt.now().date()): self._update_file_property( filename, 'authors_keep', ';'.join([auth.user_name for auth in keep_authors]), update) def get_last_brief(self, filename): brief = self._get_file_property(filename, 'brief') if brief is not None: return brief[1] def get_last_keep_authors(self, filename, author_db): res = self._get_file_property(filename, 'authors_keep') if res is not None: authors = res[1].split(";") keep_authors = [] for auth in authors: if len(auth) != 0: a = author_db.find_by_user_name(auth) keep_authors.append(a) return keep_authors @property def version_config(self): if 'version' in self._configs: return int(self._configs['version']) return 1 class LicenserCSVDB(LicenserDB): __author_file = '' __copyright_file = '' __treated_files = dict() # noinspection PyUnusedLocal def __init__(self, filename, **kwargs): self.__config_file = filename with open(self.__config_file) as fh: split_lines = [line.strip().split(';') for line in fh] if len(split_lines) > 1: raise (Exception("Too many lines in the configuration file")) self._configs['project'], self.__author_file, self.__copyright_file, self._configs['versioning_backend'] = [ token.strip('"') for token in split_lines[0] ] with open('treated_files.list', 'r') as file_: for line in file_: f, date, lid = line.split(";") f = f.strip() date = dt.strptime(date, "%Y-%m-%d") self.__treated_files[f] = [date, lid.strip()] @property def authors(self): with open(self.__author_file) as fh: next(fh) split_lines = [line.strip().split(';') for line in fh] return [adb.Author.make((token.strip('"') for token in line)) for line in split_lines] def add_author(self, author): with open(self.__author_file, 'a') as fh: fh.write( ';'.join(['\"{0}\"'.format(val) for val in author]) + '\n') def find_file(self, filename): if filename in self.__treated_files: return self.__treated_files[filename] return None def update_license_file(self, filename, license_id, update=dt.now().date()): self.__treated_files[filename] = [ update.strftime("%Y-%m-%d"), license_id] with open('treated_files.list', 'w') as f: f.write("\n".join([";".join(f) for f in self.__treated_files.items])) @property def copyrights(self): with open(self.__copyright_file) as fh: split_lines = [line.strip().split(';') for line in fh] tab = ([[token.strip('"') for token in line] for line in split_lines]) return [CopyRight._make( [lid, on_modif, dt.strptime(start, "%d-%m-%Y").date(), dt.strptime(end, "%d-%m-%Y").date(), "".join([line for line in open(license_file)])]) for lid, on_modif, start, end, license_file in tab] diff --git a/pylicenser/vcs/git.py b/pylicenser/vcs/git.py index 83a322a..a774da6 100644 --- a/pylicenser/vcs/git.py +++ b/pylicenser/vcs/git.py @@ -1,162 +1,163 @@ """ git.py: Backend for GIT""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" import git import binascii import datetime as dt import re from .. import export from .. import version_info @export class GITInfo(version_info.VersionInfo): """ git implementation of the version info class """ __repo = None __other_names = [] - def __init__(self, repo, filename, ignore_list, rev_to=None, **kwargs): - super().__init__(repo, filename, ignore_list, **kwargs) + def __init__(self, repo, filename, + authors_ignore_list=[], commits_ignore_list=[], + rev_to=None, **kwargs): + super().__init__(repo, filename, authors_ignore_list, **kwargs) self.__repo = git.Repo(self._repo) self._name = filename.replace(self._repo, '') + self._commits_ignore_list = commits_ignore_list + self._authors_ignore_list = commits_ignore_list - commits = self._list_commits(rev_to=rev_to, ignore_list=ignore_list) + commits = self._list_commits(rev_to=rev_to) self._modification_dates = [] for c in commits: - if c.author.email not in ignore_list: - self._modification_dates.append( - dt.datetime.fromtimestamp(c.authored_date)) + self._modification_dates.append( + dt.datetime.fromtimestamp(c.authored_date)) self.__other_names = self._list_names(commits) # noinspection PyIncorrectDocstring def populate(self, author_db, rev_from=None, rev_to=None): """ populate the internal variables """ - _commits = self._list_commits(rev_to=rev_to, - rev_from=rev_from) + _commits = self._list_commits(rev_to=rev_to, rev_from=rev_from) for c in _commits: email = c.author.email a = author_db.find_by_email(email) auth = a.user_name if auth not in self._author_list: self._author_list[auth] = [] if not author_db.is_author_in_ignore_list(a): self._authors.add(a) d = dt.datetime.fromtimestamp(c.authored_date) rev = c.hexsha msg = c.message self._author_list[auth].append((d, rev, msg)) def get_modifications(self, revision): hexsha = binascii.unhexlify(revision) commit = git.objects.commit.Commit(self.__repo, hexsha) stats = commit.stats.total patches = [] subproject_meta = re.compile( r'[-+]Subproject commit [0-9a-z]+$') header_modif = re.compile( r'''[-+] * ((@|__)author(__)?|brief|__license__|''' + r'''__credits__|@date|(C|__c)opyright\s+\(©\)) ''') - if len(commit.parents) == 2: #merge commit + if len(commit.parents) > 1: # merge commit print('Skipping merge commit') return '\n'.join(patches), stats for parent in commit.parents: diffs = commit.diff( parent, create_patch=True, ignore_space_change=True, paths=[self.__other_names]) patch = [ d.diff.decode('utf-8', errors="surrogateescape") for d in diffs ] if len(patch) > 0 and not re.match( subproject_meta, str(patch)) and not re.match(header_modif, str(patch)): patches.extend(patch) return '\n'.join(patches), stats - def _list_commits(self, rev_to=None, rev_from=None, ignore_list=None): - if ignore_list is None: - ignore_list = [] - + def _list_commits(self, rev_to=None, rev_from=None): _args = {'follow': True, # 'all': True, + 'no-merges': True, 'pretty': 'tformat:%H'} if rev_from is not None: _args['since'] = rev_from.strftime("%Y-%m-%d") if rev_to is not None: _args['until'] = rev_to.strftime("%Y-%m-%d") # git log can follow better renames in case there where badly done # (add/rm instead of mv) git_cmd = git.cmd.Git(working_dir=self.__repo.working_dir) str_c = git_cmd.log(self._name, **_args) if len(str_c) != 0: binhashes = (binascii.unhexlify(c) for c in str_c.split('\n')) else: binhashes = [] list_commits = [] + for b in binhashes: c = git.objects.commit.Commit(self.__repo, b) - if c.author.email not in ignore_list: + if ((c.author.email not in self._authors_ignore_list) and + (c.hexsha not in self._commits_ignore_list)): list_commits.append(c) - if len(list_commits) == 0: - del _args['follow'] - del _args['pretty'] - return list(self.__repo.iter_commits(paths=self._name, **_args)) + # if len(list_commits) == 0: + # del _args['follow'] + # del _args['pretty'] + # return list(self.__repo.iter_commits(paths=self._name, **_args)) - # list_commits = list(self.__repo.iter_commits(paths=self._name, **_args)) return list_commits def _list_names(self, commits): """ Finds all the names of a given file """ names = [self._name] for c in commits: for p in c.parents: diffs = c.diff(p, R=True, M=True) for d in diffs.iter_change_type('R'): if d.renamed and d.rename_to in names: new_path = d.rename_from self._names[new_path] = c.hexsha names.append(new_path) if len(self._names.keys()) == 0: c = commits[-1] self._names[self._name] = c.hexsha names.reverse() return names @property def oldest_name(self): return self.__other_names[0] diff --git a/pylicenser/vcs/svn.py b/pylicenser/vcs/svn.py index de0c10a..ab2ba6d 100644 --- a/pylicenser/vcs/svn.py +++ b/pylicenser/vcs/svn.py @@ -1,130 +1,131 @@ """ svn.py: Backend for SVN""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" """ Gather info for subversion based repos """ from .. import export from .. import print_colored from .. import version_info import svn import re import datetime as dt @export class SVNInfo(version_info.VersionInfo): """ subversion implementation of the version info class """ - def __init__(self, repo, filename, ignore_list, rev_to=None, **kwargs): - super().__init__(repo, filename, ignore_list, **kwargs) + def __init__(self, repo, filename, authors_ignore_list=[], + rev_to=None, **kwargs): + super().__init__(repo, filename, authors_ignore_list, **kwargs) self._client = pysvn.Client() self._root_url = self._client.root_url_from_path(repo) self._infos = self._client.info(self._filename) if rev_to is not None: rev_start = pysvn.Revision(pysvn.opt_revision_kind.date, rev_to.strftime('%s')) else: rev_start = pysvn.Revision(pysvn.opt_revision_kind.head) self._name = self._infos.url.replace(self._root_url, '') self._logs = self._client.log(self._filename, strict_node_history=False, discover_changed_paths=True, revision_start=rev_start) prev_name = self._name self._logs = sorted(self._logs, key=lambda log: log["revision"].number, reverse=True) for entry in self._logs: d = dt.datetime.fromtimestamp(entry["date"]) self._modification_dates.append(d) # checking if the name as changed old_names = [ ch_path for ch_path in entry["changed_paths"] if (re.match("{0}.*".format(ch_path["path"]), prev_name)) ] if len(old_names) > 0 and old_names[0]["copyfrom_path"] is not None: m = re.match("{0}(.*)".format(old_names[0]["path"]), prev_name) tmp_prev_name = prev_name prev_name = "{0}{1}".format(old_names[0]["copyfrom_path"], m.group(1) if m else "") self._names[old_names[0]["copyfrom_revision"].number] = tmp_prev_name self._names[0] = prev_name # print("\n".join(["{0} {1}".format(n, r) for r, n in self._names.items() ])) @property def oldest_name(self): return self._names[0] def populate(self, author_db, rev_from=None, rev_to=None): if rev_from is not None: rev_end = pysvn.Revision(pysvn.opt_revision_kind.date, rev_from.strftime('%s')) else: rev_end = pysvn.Revision(pysvn.opt_revision_kind.number, 0) if rev_to is not None: rev_start = pysvn.Revision(pysvn.opt_revision_kind.date, rev_to.strftime('%s')) else: rev_start = pysvn.Revision(pysvn.opt_revision_kind.head) self._logs = self._client.log(self._filename, strict_node_history=False, revision_start=rev_start, revision_end=rev_end) self._logs = sorted(self._logs, key=lambda log: log["revision"].number) for entry in self._logs: auth = entry["author"] rev = entry["revision"] d = dt.datetime.fromtimestamp(entry["date"]) msg = entry["message"] if "message" in entry else "" self._revisions.append(rev) if auth not in self._author_list: self._author_list[auth] = [] a = author_db.find_by_user_name(auth) if not author_db.is_author_in_ignore_list(a): self._authors.add(a) self._author_list[auth].append((d, rev, msg)) def __find_previous_release(self, r): revs = [ rev.number for rev in self._revisions if rev.number < r] rev = 0 if len(revs) == 0 else max(revs) return pysvn.Revision(pysvn.opt_revision_kind.number, rev) def get_modifications(self, revision): r_pre = pysvn.Revision(pysvn.opt_revision_kind.number, revision.number -1) # self.__find_previous_release(revision.number) relevant_rev = max([ n for n in self._names.keys() if revision.number > n ]) try: # print("{0} r{1}:{2}".format(self._root_url + self._names[relevant_rev], # r_pre.number, # revision.number)) res_diff = self._client.diff("/tmp", self._root_url + self._names[relevant_rev], revision1=r_pre, revision2=revision, diff_options=['-b', '-w']) stats = {'modifications': len(res_diff)} return res_diff,stats except Exception as e: pass return "" diff --git a/pylicenser/version_info.py b/pylicenser/version_info.py index 6f3ac6c..c60682c 100755 --- a/pylicenser/version_info.py +++ b/pylicenser/version_info.py @@ -1,97 +1,99 @@ # -*- coding: utf-8 -*- """ version_info.py: Base class for interaction with version control system""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" from . import print_colored from . import export import os @export class VersionInfo: """Generic class handling the communication with the versioning system""" # members _authors = set() _author_list = dict() _modification_dates = [] _name = "" _repo = "" _filename = "" _revisions = [] _names = dict() - def __new__(cls, repo, filename, ignore_list, backend=None, **kwargs): + def __new__(cls, repo, filename, authors_ignore_list=[], + backend=None, **kwargs): """ Factory constructor depending on the chosen backend """ if backend == 'svn': from .vcs import svn return super().__new__(svn.SVNInfo) elif backend == 'git': from .vcs import git return super().__new__(git.GITInfo) else: raise Exception("{0} not a known backend".format(backend)) # noinspection PyUnusedLocal - def __init__(self, repo, filename, ignore_list, **kwargs): + def __init__(self, repo, filename, authors_ignore_list=[], **kwargs): self._repo = os.path.expanduser(repo) self._filename = os.path.expanduser(filename) + self._authors_ignore_list = authors_ignore_list @property def creation_date(self): if len(self._modification_dates) > 0: return min(self._modification_dates) @property def last_modification_date(self): if len(self._modification_dates) > 0: return max(self._modification_dates) @property def authors(self): return self._authors def number_of_modifications(self, author): return len(self._author_list[author]) def modifications_by_author(self, author): auth = self._author_list[author] res = [] stats = {} for d, r, msg in auth: date = d.strftime("%d-%m-%Y") info = print_colored("@ {0} rev {1} msg {2}".format(date, r, msg), 'blue', attrs=['bold']) modif, _stats = self.get_modifications(r) for k, v in _stats.items(): if k in stats: stats[k] += v else: stats[k] = v res.append([info, modif, modif.split("\n"), _stats]) return res, stats @property def name(self): return self._name @property def oldest_name(self): return self._name def get_modifications(self, revision): pass