diff --git a/pylicenser/vcs/git.py b/pylicenser/vcs/git.py index 85101a8..83a322a 100644 --- a/pylicenser/vcs/git.py +++ b/pylicenser/vcs/git.py @@ -1,162 +1,162 @@ """ git.py: Backend for GIT""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" import git import binascii import datetime as dt import re from .. import export from .. import version_info @export class GITInfo(version_info.VersionInfo): """ git implementation of the version info class """ __repo = None __other_names = [] def __init__(self, repo, filename, ignore_list, rev_to=None, **kwargs): super().__init__(repo, filename, ignore_list, **kwargs) self.__repo = git.Repo(self._repo) self._name = filename.replace(self._repo, '') commits = self._list_commits(rev_to=rev_to, ignore_list=ignore_list) self._modification_dates = [] for c in commits: if c.author.email not in ignore_list: self._modification_dates.append( dt.datetime.fromtimestamp(c.authored_date)) self.__other_names = self._list_names(commits) # noinspection PyIncorrectDocstring def populate(self, author_db, rev_from=None, rev_to=None): """ populate the internal variables """ _commits = self._list_commits(rev_to=rev_to, rev_from=rev_from) for c in _commits: email = c.author.email a = author_db.find_by_email(email) auth = a.user_name if auth not in self._author_list: self._author_list[auth] = [] if not author_db.is_author_in_ignore_list(a): self._authors.add(a) d = dt.datetime.fromtimestamp(c.authored_date) rev = c.hexsha msg = c.message self._author_list[auth].append((d, rev, msg)) def get_modifications(self, revision): hexsha = binascii.unhexlify(revision) commit = git.objects.commit.Commit(self.__repo, hexsha) stats = commit.stats.total patches = [] subproject_meta = re.compile( r'[-+]Subproject commit [0-9a-z]+$') header_modif = re.compile( r'''[-+] * ((@|__)author(__)?|brief|__license__|''' + r'''__credits__|@date|(C|__c)opyright\s+\(©\)) ''') if len(commit.parents) == 2: #merge commit print('Skipping merge commit') return '\n'.join(patches), stats for parent in commit.parents: diffs = commit.diff( parent, create_patch=True, ignore_space_change=True, paths=[self.__other_names]) patch = [ d.diff.decode('utf-8', errors="surrogateescape") for d in diffs ] if len(patch) > 0 and not re.match( subproject_meta, str(patch)) and not re.match(header_modif, str(patch)): patches.extend(patch) return '\n'.join(patches), stats def _list_commits(self, rev_to=None, rev_from=None, ignore_list=None): if ignore_list is None: ignore_list = [] _args = {'follow': True, - 'all': True, + # 'all': True, 'pretty': 'tformat:%H'} if rev_from is not None: _args['since'] = rev_from.strftime("%Y-%m-%d") if rev_to is not None: _args['until'] = rev_to.strftime("%Y-%m-%d") # git log can follow better renames in case there where badly done # (add/rm instead of mv) git_cmd = git.cmd.Git(working_dir=self.__repo.working_dir) str_c = git_cmd.log(self._name, **_args) if len(str_c) != 0: binhashes = (binascii.unhexlify(c) for c in str_c.split('\n')) else: binhashes = [] list_commits = [] for b in binhashes: c = git.objects.commit.Commit(self.__repo, b) if c.author.email not in ignore_list: list_commits.append(c) if len(list_commits) == 0: del _args['follow'] del _args['pretty'] return list(self.__repo.iter_commits(paths=self._name, **_args)) # list_commits = list(self.__repo.iter_commits(paths=self._name, **_args)) return list_commits def _list_names(self, commits): """ Finds all the names of a given file """ names = [self._name] for c in commits: for p in c.parents: diffs = c.diff(p, R=True, M=True) for d in diffs.iter_change_type('R'): if d.renamed and d.rename_to in names: new_path = d.rename_from self._names[new_path] = c.hexsha names.append(new_path) if len(self._names.keys()) == 0: c = commits[-1] self._names[self._name] = c.hexsha names.reverse() return names @property def oldest_name(self): return self.__other_names[0]