diff --git a/pylicenser/author_db.py b/pylicenser/author_db.py index 896ef32..de5031c 100755 --- a/pylicenser/author_db.py +++ b/pylicenser/author_db.py @@ -1,161 +1,167 @@ """ author_db.py: Handling authors""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" from . import export, licenser_ask_question from builtins import property as _property, tuple as _tuple from operator import itemgetter as _itemgetter from collections import OrderedDict @export class Author(tuple): """Author(user_name, e_mail, last_name, first_name, real_email)""" __slots__ = () _fields = ('user_name', 'e_mail', 'last_name', 'first_name', 'real_email') # noinspection PyInitNewSignature def __new__(cls, user_name, e_mail, last_name, first_name, real_email): """Create new instance of Author(user_name, e_mail, last_name, first_name, real_email)""" return _tuple.__new__(cls, (user_name, e_mail, last_name, first_name, real_email)) # noinspection PyIncorrectDocstring @classmethod def _make(cls, iterable, new=tuple.__new__, len=len): """Make a new Author object from a sequence or iterable""" result = new(cls, iterable) if len(result) != 5: raise TypeError('Expected 5 arguments, got %d' % len(result)) return result # noinspection PyArgumentList @classmethod def make(cls, iterable, new=tuple.__new__, len=len): cls._make(cls, iterable, new=new, len=len) # noinspection PyMethodParameters def _replace(_self, **kwds): """Return a new Author object replacing specified fields with new values""" result = _self._make(map(kwds.pop, ('user_name', 'e_mail', 'last_name', 'first_name', 'real_email'), _self)) if kwds: raise ValueError('Got unexpected field names: %r' % list(kwds)) return result def __repr__(self): # noinspection PyTypeChecker if self.real_email is not None and len(self.real_email) > 0: email = self.real_email else: email = self.e_mail - return "{0} {1} <{2}>".format(self.first_name, self.last_name, email) + return "{0} {1}{2}".format( + self.first_name, self.last_name, + " <{}>".format(email) if email is not "None" else "") def __eq__(self, other): return self.user_name == other.user_name def __hash__(self): return hash(self.user_name) def _asdict(self): """Return a new OrderedDict which maps field names to their values.""" return OrderedDict(zip(self._fields, self)) def __getnewargs__(self): """Return self as a plain tuple. Used by copy and pickle.""" return tuple(self) user_name = _property(_itemgetter(0), doc='Alias for field number 0') e_mail = _property(_itemgetter(1), doc='Alias for field number 1') last_name = _property(_itemgetter(2), doc='Alias for field number 2') first_name = _property(_itemgetter(3), doc='Alias for field number 3') real_email = _property(_itemgetter(4), doc='Alias for field number 4') @export class AuthorDB: def __init__(self, db): self.__db = db self._authors = db.authors def __add_missing_author(self, entry): print( 'The author identified by the key \'{0}\' is not know please' ' enter the information for this author.'.format(entry)) valid_entry = False auth = {'first_name': {'str': 'First name', 'value': None}, 'last_name': {'str': 'Last name', 'value': None}, 'e_mail': {'str': 'E-Mail', 'value': None}, 'user_name': {'str': 'Username', 'value': None} } for key, value in entry.items(): auth[key]['value'] = value while not valid_entry: for key, value in auth.items(): if key not in entry: answer = input("{str} [{value}]: ".format(**value)) value['value'] = answer if not answer == '' \ else value['value'] print('\n{}'.format( '\n'.join(['{str}: {value}'.format(**val) for _, val in auth.items()]))) valid_entry = licenser_ask_question("Are this information correct") if valid_entry: + for val in auth: + if val['value'] is None: + val['value'] = 'None' + author = Author(auth['user_name']['value'], auth['e_mail']['value'], auth['last_name']['value'], auth['first_name']['value'], '') self.__db.add_author(author) self._authors.append(author) return author def is_author_in_ignore_list(self, author): return self.__db.is_author_in_ignore_list(author) def find_by_name(self, first_name, last_name): find = [author for author in self._authors if ((author.first_name == first_name) and (author.last_name == last_name))] if len(find) == 0: find.append(self.__add_missing_author({'first_name': first_name, 'last_name': last_name})) return find[0] def find_by_user_name(self, user_name): find = [author for author in self._authors if author.user_name == user_name] if len(find) == 0: find.append(self.__add_missing_author({'user_name': user_name})) return find[0] def find_by_email(self, e_mail): find = [author for author in self._authors if author.e_mail == e_mail] if len(find) == 0: find.append(self.__add_missing_author({'e_mail': e_mail})) return find[0] diff --git a/pylicenser/file_info.py b/pylicenser/file_info.py index fd6a333..a160294 100755 --- a/pylicenser/file_info.py +++ b/pylicenser/file_info.py @@ -1,397 +1,397 @@ # -*- coding: utf-8 -*- """ file_info.py: Per file type specialisation of the code""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" import os import re import jinja2 import copy from . import export UNKNOWN_TYPE = 0 CPP_SOURCES = 1 CMAKE_FILES = 2 SWIG_FILES = 3 PYTHON_SCRIPT = 4 CREATION_MODIFICATION = 0 LAST_MODIFICATION = 1 CREATION = 2 @export class FileInfo: __date_style_conv = {'creation_modification': CREATION_MODIFICATION, 'last_modification': LAST_MODIFICATION, 'creation': CREATION} __supported_ext = [] __supported_types = { CPP_SOURCES: [".cc", ".c", ".hh", ".h"], CMAKE_FILES: [".txt", ".cmake"], SWIG_FILES: [".i"], PYTHON_SCRIPT: [".py"]} _file_type = UNKNOWN_TYPE _file_content = "" _warn_list = list() _brief = "" _sections = list() _authors = set() _header = "" def __init__(self, filename, author_db): self._warn_list = list() self._sections = list() self._authors = set() self._author_db = author_db self._filename = os.path.expanduser(filename) garbage, self._ext = os.path.splitext(os.path.basename(filename)) if self._ext == '.in': self._ext = os.path.splitext(garbage)[1] for key, value in self.__supported_types.items(): self.__supported_ext.extend(value) if self._ext not in self.__supported_ext: raise NotImplementedError( "File {0} has been skipped based on its extension".format( self._filename)) for key, value in self.__supported_types.items(): if self._ext in value: self._file_type = key self.__analyse_content() def __analyse_content(self): with open(self._filename) as f: self._file_content = f.read() try: self.__split_header() except Exception as error: raise Exception( "While dealing with file {0}:\n{1}".format(self._filename, error)) self.__find_authors() self.__find_brief() self.__save_sections() if self._warn_list: print("\nWARNING!:") print(" While dealing with file {0}".format(self._filename)) print(" the following shit happened:") [print(" {0}".format(warning)) for warning in self._warn_list] print("{0}:1:".format(self._filename)) def __save_sections(self): reg = re.compile('^(.*@section)', re.MULTILINE) pref = re.search(reg, self._header) pref = pref.group(1) if pref is not None else "" reg = re.compile('^.*@section', re.MULTILINE) licreg = re.compile('LICEN.E') for section in re.split(reg, self._header)[1:]: if not re.search(licreg, section.split('\n')[0]): self._sections.append(pref + section) def __split_header(self): self.__pre_header = "" - if not re.search("Copyright", self._file_content): + if not re.search("(section LICENSE|[Cc]opyright)", self._file_content): self._header = "" self._body = self._file_content print("This file as no header or a really badly formatted one!!") else: if self._file_type in [CPP_SOURCES, SWIG_FILES]: self._header, self._body = re.split( r"\*/", self._file_content, maxsplit=1) self._header = '\n'.join( (line for line in self._header.split('\n') if line.strip())) elif self._file_type == CMAKE_FILES: lines = self._file_content.split('\n') nb_sep = 0 nb_whitelines = 0 header_lines = list() while nb_sep < 2: if lines[0].strip().startswith('#===='): nb_sep += 1 elif not lines[0].strip(): nb_whitelines += 1 header_lines.append(lines.pop(0)) if nb_whitelines > 0: self._warn_list.append( "The header has a blank line or is ill-formed") self._header = "\n".join(header_lines) self._body = "\n".join(lines) elif self._file_type == PYTHON_SCRIPT: _pre_header_re = re.compile(r"^(#.*\n)*", re.MULTILINE) matches = _pre_header_re.match(self._file_content) if matches: self.__pre_header = matches.group().strip() self._body = copy.copy(self._file_content) self._body = self._body.replace(self.__pre_header, '').strip() self._header = '' reg_doc_string = re.compile( r"""(?P["']{3}""" + r"""(?:\s*(?P.*?):\s*)""" + r"""(?P(?:.|\n)*?)["']{3})?\n*""", re.MULTILINE) match = reg_doc_string.search(self._body) if match: self._header = self._header + match.group(0) reg_key_value = re.compile( r"""(?P(?P""" + r"""__(?:license|author|credits|copyright)__)""" + # NOQA: E501, keywords r"""\s*=(?P(?:(?P\s*\[\n?)?""" + # NOQA: E501, opening braces r"""(?:\s*".*"\s*(?:\\|,)?\n)+""" + # NOQA: E501, value, multiline or comma separated r"""(?(brace)\s*\]\n))?))\n*""", # NOQA: E501, closing braces if opening present re.MULTILINE) for match in re.finditer(reg_key_value, self._body): self._header = self._header + match.group(0) self._body = self._body.replace(self._header, '') def __find_authors(self): + def _add_authors(reg, names): + for match in re.finditer(reg, names): + if match.group('email') and match.group('email') != 'None': + author = self._author_db.find_by_email( + match.group('email')) + else: + author = self._author_db.find_by_name( + match.group('firstname'), + match.group('lastname')) + + if not self._author_db.is_author_in_ignore_list(author): + self._authors.add(author) + if self._file_type == PYTHON_SCRIPT: reg = re.compile( r"__credits__ = \[(?P(?:\s*\"(.*?)\",?\n?)*)\]") # NOQA(E501) matches = reg.search(self._header) if matches: names = matches.group('names').strip() reg = re.compile( - r"\"(?P\w+)\s*(?P\w+)(?:\s*<(?P.*?)>)?\"") # NOQA(E501) - for match in re.finditer(reg, names): - if match.group('email'): - author = self._author_db.find_by_email( - match.group('email')) - else: - author = self._author_db.find_by_name( - match.group('firstname'), - match.group('lastname')) - - if not self._author_db.is_author_in_ignore_list(author): - self._authors.add(author) - - reg = re.compile( - r'@author\s+(.*)\s\w+)\s*(?P\w+?)(?:\s*<(?P.*?)>)?\"") # NOQA(E501) + _add_authors(reg, names) + else: + reg = re.compile( + r'@author\s+(?P\w+)(?:\w|\s)*\s+(?P\w+)(?:\s+<(?P.+@.+\..+)>)?', # NOQA(E501) + re.IGNORECASE) + _add_authors(reg, self._header) def __find_brief(self): if self._file_type == PYTHON_SCRIPT: _brief_re = re.compile( r"(?P[\"']{3}(?:\s*(?P.*?):\s*)(?P.*?)[\"']{3})", # NOQA(E501) re.MULTILINE | re.DOTALL) matches = _brief_re.match(self._header) if matches: self._brief = ' '.join( [line.strip() for line in matches.group('brief').split('\n')]) else: self._brief = "" return if self._file_type in [CPP_SOURCES, SWIG_FILES]: prefix = " *" elif self._file_type == CMAKE_FILES: prefix = "#" else: raise IOError("file type unknown") reg = re.compile( '@brief(.*?)^ ?\S\s+(@|Copyright)', re.MULTILINE | re.DOTALL) matches = [match for match in re.finditer(reg, self._header)] if len(matches) == 0: self._brief = "" return if len(matches) > 1: self._warn_list.append("There's multiple @brief") self._brief = "\n".join((line.replace(prefix, "").strip() for line in matches[0].group(1).split('\n') if len(line.strip()) > 1)) def generate_header_2(self, **kwargs): args = copy.copy(kwargs) args['filename'] = os.path.basename(self._filename) _authors_list = [author for author in args['authors']] _authors_list.sort(key=lambda a: a.last_name) args['authors'] = _authors_list args['pre_header'] = self.__pre_header args['sections'] = self._sections full_author = ', '.join(["{0.first_name} {0.last_name}".format(author) for author in _authors_list[:-1]]) if len(_authors_list) > 1: full_author = full_author + " and " + \ "{0.first_name} {0.last_name}".format(_authors_list[-1]) else: full_author = "{0.first_name} {0.last_name}".format( _authors_list[0]) args['full_authors'] = full_author if self._file_type in [CPP_SOURCES, SWIG_FILES]: template_file = 'cpp.j2' elif self._file_type == CMAKE_FILES: template_file = 'cmake.j2' elif self._file_type == PYTHON_SCRIPT: template_file = 'python.j2' env = jinja2.Environment( loader=jinja2.FileSystemLoader('pylicenser/header_templates/'), trim_blocks=True, undefined=jinja2.DebugUndefined ) env.globals['CREATION_MODIFICATION'] = CREATION_MODIFICATION env.globals['LAST_MODIFICATION'] = LAST_MODIFICATION env.globals['CREATION'] = CREATION template = env.get_template(template_file) return template.render(args) def generate_header(self, real_authors=None, copyright_txt="", creation_date=None, last_modification_date=None, date_style=CREATION_MODIFICATION, date_format="%a %b %d %H:%M:%S %Y", brief=None): if type(date_style) == str: # noinspection PyTypeChecker date_style = self.__date_style_conv[date_style] if self._file_type in [CPP_SOURCES, SWIG_FILES]: starter = "/**" prefix = " *" closer = " */" force_closer = True elif self._file_type == CMAKE_FILES: starter = "#===============================================================================" # NOQA(E501) prefix = "#" closer = "#===============================================================================" # NOQA(E501) force_closer = False elif self._file_type == PYTHON_SCRIPT: raise IOError( "Python script are not supported with this version of config") else: raise IOError("File type not supported") if real_authors is None: real_authors = self._authors new_lines = list() new_lines.append(starter) # file name new_lines.append(prefix + " @file " + os.path.basename(self._filename)) new_lines.append(prefix) # authors authors = list(real_authors) authors.sort(key=lambda x: x.last_name + ' ' + x.first_name) for author in authors: new_lines.append('{0} @author {1}'.format(prefix, author)) new_lines.append(prefix) # date if ((date_style == CREATION_MODIFICATION) and (creation_date is not None) and (last_modification_date is not None)): new_lines.append( "{0} @date creation: {1}".format( prefix, creation_date.strftime(date_format))) if last_modification_date != creation_date: new_lines.append( "{0} @date last modification: {1}".format( prefix, last_modification_date.strftime(date_format))) elif ((date_style == LAST_MODIFICATION) and (last_modification_date is not None)): new_lines.append("{0} @date {1}".format( prefix, last_modification_date.strftime(date_format))) elif ((date_style == CREATION) and (creation_date is not None)): new_lines.append("{0} @date {1}".format( prefix, creation_date.strftime(date_format))) new_lines.append(prefix) # brief brief_list = (self._brief if brief is None else brief).split("\n") pref_join = ("\n{0} ".format(prefix)) new_lines.append( '{0} @brief {1}'.format( prefix, pref_join.join([b for b in brief_list]))) new_lines.append(prefix) # license new_lines.append(prefix + " @section LICENSE") new_lines.append(prefix) for line in copyright_txt.split("\n"): new_lines.append('{0}{1}{2}'.format( prefix, " " if line else "", line)) new_lines.append(prefix) # sections for section in self._sections: new_lines += section.split('\n') if force_closer or not self._sections: new_lines.append(closer) return '\n'.join(new_lines) def get_brief(self): return self._brief def replace_file(self, new_header=None): if new_header is None: new_header = self._header body = self._body.split("\n") while body and not body[0].strip(): body.pop(0) new_file = (new_header + '\n\n' + '\n'.join(body)) if self._file_type == PYTHON_SCRIPT: new_file = new_file + '\n' # ensure new line at the end W292 with open(self._filename, "w") as fh: print(new_file, file=fh, end='') @property def authors(self): return self._authors diff --git a/pylicenser/file_transformer.py b/pylicenser/file_transformer.py index cf69bc3..9f20650 100755 --- a/pylicenser/file_transformer.py +++ b/pylicenser/file_transformer.py @@ -1,359 +1,359 @@ # -*- coding: utf-8 -*- """ file_transformer.py: Coordination of the different transformations""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" import datetime as dt from . import licenser_ask_question from . import author_db as adb from . import copyright_db as cdb from . import export from . import file_info as fi from . import print_colored from . import version_info as vc import os from pygments import highlight from pygments.lexers.diff import DiffLexer from pygments.formatters.terminal256 import Terminal256Formatter from pygments.formatters.terminal import TerminalFormatter @export class FileTransformer(object): """ Class that reformat the headers """ __keep_authors = None _brief = None __ignore = False _new_header = None def __init__(self, filename, db, release_date=dt.datetime.now().date(), no_author_check=False, no_brief_check=False, force=False, version=None, **kwargs): self.__filename = filename self.__release_date = release_date self.__db = db self.__no_brief_check = no_brief_check self.__no_author_check = no_author_check self.__version = version # Check the authors self.__author_db = adb.AuthorDB(db) if "vc_backend" in kwargs and kwargs["vc_backend"] is not None: vc_back = kwargs["vc_backend"] else: vc_back = db.versioning_backend if vc_back != "none": self._date_style = self.__db.get_config('date_style') self.__repo = self.__db.get_config('repo') self._vc_info = vc.VersionInfo( self.__repo, self.__filename, self.__db.get_list_of_ignore_emails(), backend=vc_back, rev_to=release_date) self.__name = self._vc_info.name self.__filename = self.__repo + '/' + self.__name self._creation_date = self._vc_info.creation_date self._last_modif = self._vc_info.last_modification_date else: self._creation_date = None self._last_modif = None self._date_style = None self._vc_info = None try: self._file = fi.FileInfo(self.__filename, self.__author_db) except NotImplementedError: print("File {0} ignored due to {1}".format( self._vc_info.name, print_colored('unknown type', 'red', attrs=['bold']))) self.__ignore = True return rev_from = None license_id = None self.__oldest_name = "" if vc_back != "none": self.__oldest_name = self._vc_info.oldest_name file_maj = self.__db.find_file(self.__oldest_name) if file_maj is not None: rev_from, license_id = file_maj if not force and rev_from.date() >= release_date: print(("File {0} ignored due to recent " + "modifications ({1})").format( self._vc_info.name, print_colored( rev_from.strftime("%Y-%m-%d"), 'red', attrs=['bold']))) self.__ignore = True return if not self.__no_author_check: self._vc_authors = self._vc_info.authors self._vc_info.populate(self.__author_db, rev_to=self.__release_date, rev_from=rev_from) self._vc_authors = self._vc_info.authors self._f_authors = self._file.authors self._real_authors = self.__compare_authors(self._f_authors, self._vc_authors, file_is_good=False, **kwargs) if self._real_authors - self._f_authors: print("Added authors:\n{0}".format( "\n".join([ " @author {0}".format(author) for author in self._real_authors - self._f_authors ]))) if self._f_authors - self._real_authors: print( "Removed authors:\n{0}".format( "\n".join([ " @author {0}".format(author) for author in self._f_authors - self._real_authors ]))) else: self._real_authors = self._file.authors if len(self._real_authors) == 0: res = input("This file as no author please provide " + "the username of the authors: ") while res != "": self._real_authors.add(self.__author_db.find_by_user_name(res)) res = input("> ") # Check the brief if not self.__no_brief_check: self.__check_brief(**kwargs) else: self._brief = self._file.get_brief() if "force_license" in kwargs and kwargs['force_license'] is not None: license_id = kwargs["force_license"] # Getting the license content self.__copyright_base = cdb.CopyrightDB(db) if license_id is None: copyright_policy = self.__db.get_config('copyright_policy') if copyright_policy == 'creation_date': cdate = self._creation_date.date() elif copyright_policy == 'release_date': cdate = self.__release_date else: cdate = None self._lic = self.__copyright_base.find_by_date(cdate) else: self._lic = self.__copyright_base.find_by_id(license_id) if self._lic is None: raise ("The license with the id {0} is not defined".format( license_id)) date_format = self.__db.get_config('date_format') version_config = self.__db.get_config('date_format') # Generates the new header file _header_params = { 'authors': self._real_authors, 'copyright_txt': self._lic.text, 'date_format': date_format, 'date_style': self._date_style, 'last_modification_date': self._last_modif, 'creation_date': self._creation_date, 'brief': self._brief, 'version': self.__version } if version_config == 1: self._new_header = self._file.generate_header( **_header_params) else: self._new_header = self._file.generate_header_2( license_header=self._lic.header, need_header=self._lic.need_header, license=self._lic.license, **_header_params ) # noinspection PyPep8Naming,PyUnusedLocal def __compare_authors(self, file_known_tmp, vc_known, file_is_good=False, ignore_threshold=0, yes=False, **kwargs): file_known = file_known_tmp if len(file_known) == 0 and len(vc_known) == 1: file_known = vc_known if not file_is_good: last_keep = None if len(self.__oldest_name) != 0: last_keep = self.__db.get_last_keep_authors( self.__oldest_name, self.__author_db) if last_keep is None: last_keep = [] for author in last_keep: print("Keeping @author {0} ({1})".format( author, print_colored("previously validated", 'red', attrs=['bold']))) file_only = file_known - vc_known - set(last_keep) to_remove = set() if file_only: KEEP = 0 REMOVE = 1 for author in file_only: if not yes: answer = licenser_ask_question( "Do you want to remove @author {0}".format(author), {'k': KEEP, 'r': REMOVE}, 'k') if answer == REMOVE: to_remove.add(author) else: print(("Do you want to remove " + "@author {0}? ({1}/r)").format( author, print_colored('K', 'red', attrs=['bold']) )) file_known = file_known - to_remove - self.__keep_authors = file_known - to_remove - vc_known + self.__keep_authors = file_known - to_remove vc_only = vc_known - file_known if vc_only: to_add = set() YES = 0 NO = 1 DIFF = 2 for author in vc_only: modifications, stats = self._vc_info.modifications_by_author( author.user_name) number_of_modified_lines = sum( [len(e[2]) for e in modifications]) number_of_modifications = len(modifications) if ((number_of_modified_lines <= ignore_threshold) or (number_of_modified_lines == number_of_modifications)): print('Potential new author' + ' @author {0} '.format(author) + '({0} modifications, lines count {1}) {2}'.format( number_of_modifications, number_of_modified_lines, print_colored( "[ignored do to threshold {0}]".format( ignore_threshold), 'red', attrs=['bold']))) continue answer = DIFF while answer == DIFF: answer = licenser_ask_question( 'Do you want to add' + ' @author {0}'.format(print_colored(author, "blue")) + ' ({0} modifications, lines count {1}) '.format( number_of_modifications, number_of_modified_lines), {'d': DIFF, 'n': NO, 'y': YES}, 'n') if answer == DIFF: for info, modif, nb_lines, diff_stats in modifications: if len(modif) > 2: print(info) formatter = ( Terminal256Formatter if '256color' in os.environ.get('TERM', '') else TerminalFormatter) print(highlight(modif, DiffLexer(), formatter())) elif answer == YES: to_add.add(author) file_known = file_known.union(to_add) return file_known # noinspection PyUnusedLocal def __check_brief(self, ignore_filled_briefs=False, yes=False, **kwargs): self._brief = self._file.get_brief() if self._brief and not ignore_filled_briefs: if len(self.__oldest_name) != 0: old_brief = self.__db.get_last_brief(self.__oldest_name) else: old_brief = None if old_brief is not None and old_brief == self._brief: print('Brief: {0} ({1})'.format( self._brief, print_colored("previously validated", 'red', attrs=['bold']))) else: if not yes: keep_brief = licenser_ask_question( ("\"{0}\"\n" + "Do you want to keep this brief").format(self._brief)) if not keep_brief: self._brief = False else: print(("\"{0}\"\nDo you want to keep" + " this brief ? ({1}/n)").format( self._brief, print_colored('Y', 'red', attrs=['bold']))) if not self._brief: res = input("Please type the brief for file: ") brief = [] while res != "": brief.append(res) res = input("> ") self._brief = "\n".join(brief) def replace_file(self, dry_run=True): if self.__ignore: return if dry_run: print(self._new_header) else: if self._vc_info is not None: self.__db.update_license_file(self.__oldest_name, self._lic.lid, self.__release_date) if self._brief is not None and not self.__no_brief_check: self.__db.update_brief_file(self.__oldest_name, self._brief, self.__release_date) if ((self.__keep_authors is not None) and (not self.__no_author_check)): self.__db.update_keep_authors_file(self.__oldest_name, self.__keep_authors, self.__release_date) self._file.replace_file(self._new_header) diff --git a/pylicenser/header_templates/cmake.j2 b/pylicenser/header_templates/cmake.j2 index 464915d..178c3f3 100644 --- a/pylicenser/header_templates/cmake.j2 +++ b/pylicenser/header_templates/cmake.j2 @@ -1,41 +1,41 @@ #=============================================================================== # @file {{filename}} {% if authors %} # {% for author in authors %} # @author {{author}} {% endfor %} {% endif %} {% if date_style == 'creation_modification' and creation_date and last_modification_date %} # # @date creation: {{creation_date.strftime(date_format)}} # @date last modification: {{last_modification_date.strftime(date_format)}} {% elif date_style == 'last_modification' and last_modification_date %} # # @date: {{creation_date.strftime(date_format)}} {% elif date_style == 'creation' and creation_date %} # # @date: {{creation_date.strftime(date_format)}} {% endif %} {%- if brief %} # -# @brief {{ brief }} +# @brief {{ brief | wordwrap(78) | replace('\n', '\n# ') }} {% endif %} {% if copyright_txt %} # # # @section LICENSE # # {{ copyright_txt | wordwrap(78) | replace('\n', '\n# ') }} {% endif %} {% if license_header %} # # {{ license_header | wordwrap(78) | replace('\n', '\n# ') }} {% endif %} # {% for section in sections %} {{ section }} {% endfor %} {% if not sections %} #=============================================================================== {% endif %} diff --git a/pylicenser/header_templates/cpp.j2 b/pylicenser/header_templates/cpp.j2 index d200c76..c9e485d 100644 --- a/pylicenser/header_templates/cpp.j2 +++ b/pylicenser/header_templates/cpp.j2 @@ -1,39 +1,39 @@ /** * @file {{filename}} {% if authors %} * {% for author in authors %} * @author {{author}} {% endfor %} {% endif %} {% if date_style == 'creation_modification' and creation_date and last_modification_date %} * * @date creation: {{creation_date.strftime(date_format)}} * @date last modification: {{last_modification_date.strftime(date_format)}} {% elif date_style == 'last_modification' and last_modification_date %} * * @date: {{creation_date.strftime(date_format)}} {% elif date_style == 'creation' and creation_date %} * * @date: {{creation_date.strftime(date_format)}} {% endif %} {%- if brief %} * - * @brief {{ brief }} + * @brief {{ brief | wordwrap(77) | replace('\n', '\n * ') }} {% endif %} {% if copyright_txt %} * * * @section LICENSE * * {{ copyright_txt | wordwrap(77) | replace('\n', '\n * ') }} {% endif %} -{% if license_header %} +{% if license_header and need_header %} * * {{ license_header | wordwrap(77) | replace('\n', '\n * ') }} {% endif %} * {% for section in sections %} {{ section }} {% endfor %} */ diff --git a/pylicenser/licenser_db.py b/pylicenser/licenser_db.py index 7d39738..a0599e0 100755 --- a/pylicenser/licenser_db.py +++ b/pylicenser/licenser_db.py @@ -1,375 +1,379 @@ # -*- coding: utf-8 -*- """ licenser_db.py: Interaction with the configuration DB (SQLite or CSV)""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" from . import export from . import author_db as adb import sqlite3 as lite from collections import namedtuple from datetime import datetime as dt import os CopyRight = namedtuple('CopyRight', ['lid', 'on_modifications_txt', 'start', 'end', 'text']) CopyRight2 = namedtuple('CopyRight2', ['lid', 'license', 'on_modifications_txt', 'start', 'end', 'text', 'need_header', 'header']) SQLITE = 0 CSV = 1 @export class LicenserDB: _project = "" _versioning_backend = "svn" _configs = { 'project': "", 'versioning_backend': 'svn', 'date_format': "%a %b %d %H:%M:%S %Y", 'date_style': 'last_modification', 'copyright_policy': 'creation_date' } def __new__(cls, filename, backend=None): """ Factory constructor depending on the chosen backend """ if backend is None: _filepath = os.path.expanduser(filename) garbage, ext = os.path.splitext(os.path.basename(_filepath)) if ext == ".csv": backend = CSV elif ext == ".db": backend = SQLITE else: raise (Exception("Unknown file format")) if backend == SQLITE: obj = super().__new__(LicenserSQLITEDB) elif backend == CSV: obj = super().__new__(LicenserCSVDB) else: raise (Exception("Not a known backend")) return obj @property def authors(self): return None def add_author(self, author): pass @property def copyrights(self): return None @property def project(self): return self._configs['project'] @property def version_config(self): return 1 @property def versioning_backend(self): return self._configs['versioning_backend'] def get_config(self, conf): return self._configs[conf] if conf in self._configs else None def find_file(self, name): return None def update_license_file(self, filename, license_id, update=dt.now().date()): pass def is_author_in_ignore_list(self, author): return False @export class LicenserSQLITEDB(LicenserDB): # noinspection PyTypeChecker def __init__(self, filename): self.__connection = lite.connect(filename) self.__connection.row_factory = lite.Row self.__cursor = self.__connection.cursor() with self.__connection: self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'config' ''' + ''' ('property' TEXT, 'value' TEXT)''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'authors' ''' + ''' ('uid' TEXT NOT NULL, 'last_name' TEXT NOT NULL,''' + ''' 'first_name' TEXT NOT NULL,''' + ''' 'email' TEXT NOT NULL, 'real_email' TEXT DEFAULT '')''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'authors_ignore_list' ''' + ''' ('uid' TEXT)''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'copyrights' ''' + ''' ('lid' TEXT, 'from' TEXT, 'to' TEXT,''' + ''' 'on_modifications_txt', 'text' BLOB)''') self.__cursor.execute( '''CREATE TABLE IF NOT EXISTS 'files' ''' + ''' ('filename' TEXT, 'last_modif' TEXT, 'property' TEXT,''' + ''' 'value' TEXT,''' + ''' PRIMARY KEY('filename'))''') self.__cursor.execute('''SELECT * FROM 'config' AS c''') properties = self.__cursor.fetchall() for prop in properties: _property = prop['property'] self._configs[_property] = prop['value'] @property def authors(self): with self.__connection: self.__cursor.execute( '''SELECT a.'uid', a.'email', a.'last_name',''' + ''' a.'first_name', a.'real_email' ''' + ''' FROM 'authors' AS a''') authors = self.__cursor.fetchall() return [adb.Author( *[str(e) if e is not None else e for e in author]) for author in authors] def is_author_in_ignore_list(self, author): with self.__connection: self.__cursor.execute( '''SELECT a.'uid' ''' + ''' FROM 'authors_ignore_list' AS a''' + ''' WHERE a.'uid' = :uid''', {"uid": author.user_name}) authors = self.__cursor.fetchall() return len(authors) > 0 # noinspection PyTypeChecker def get_list_of_ignore_emails(self): with self.__connection: self.__cursor.execute( '''SELECT a.'email' ''' + ''' FROM 'authors_ignore_list' AS i, 'authors' AS a''' + ''' WHERE a.'uid' = i.'uid' ''') authors = self.__cursor.fetchall() return [str(author['email']) for author in authors] def add_author(self, author): with self.__connection: self.__cursor.execute( '''INSERT INTO 'authors' ('uid', 'first_name',''' + ''' 'last_name', 'email')''' + '''VALUES (:uid, :first_name, :last_name, :email)''', - {'uid': str(author.user_name), - 'last_name': str(author.last_name), - 'first_name': str(author.first_name), - 'email': str(author.e_mail)}) + {'uid': str(author.user_name) + if str(author.user_name) != 'None' else None, + 'last_name': str(author.last_name) + if str(author.last_name) != 'None' else None, + 'first_name': str(author.first_name) + if str(author.first_name) != 'None' else None, + 'email': str(author.e_mail) + if str(author.e_mail) != 'None' else None}) # noinspection PyTypeChecker @property def copyrights(self): with self.__connection: if self.version_config == 1: self.__cursor.execute( '''SELECT c.'lid', c.'on_modifications_txt',''' + ''' c.'from', c.'to', c.'text' ''' + ''' FROM 'copyrights' AS c''') copyrights = self.__cursor.fetchall() return [ CopyRight(copy['lid'], str(copy['on_modifications_txt']), dt.strptime(copy['from'], "%Y-%m-%d").date(), dt.strptime(copy['to'], "%Y-%m-%d").date(), (str(copy['text']) if not type(copy['text']) == bytes else copy['text'].decode('utf-8'))) for copy in copyrights] self.__cursor.execute( '''SELECT c.'lid', c.'license', c.'on_modifications_txt',''' + ''' c.'from', c.'to', c.'begin', c.'need_header', c.'text' ''' + ''' FROM 'copyrights' AS c''') copyrights = self.__cursor.fetchall() _copyrights = [] _default_copyright = '' if 'default_copyright' in self._configs: _default_copyright = self._configs['default_copyright'] if type(_default_copyright) == bytes: _default_copyright = _default_copyright.decode('utf-8') for _copyright in copyrights: _copyright_txt = _default_copyright if _copyright['text'] != '': _copyright_txt = _copyright['text'] _copyrights.append( CopyRight2( _copyright['lid'], _copyright['license'], str(_copyright['on_modifications_txt']), dt.strptime(_copyright['from'], "%Y-%m-%d").date(), dt.strptime(_copyright['to'], "%Y-%m-%d").date(), _copyright_txt.format(begin=_copyright['begin'], end=dt.today().year), bool(int(_copyright['need_header'])), (self._configs['license_header'] if 'license_header' in self._configs else ''))) return _copyrights # noinspection PyTypeChecker def find_file(self, filename): return self._get_file_property(filename, 'license_id') def _update_file_property(self, filename, property_name, value, update_time): with self.__connection: self.__cursor.execute( '''INSERT OR REPLACE INTO 'files' ('filename',''' + ''' 'last_modif', 'property', 'value') ''' + ''' VALUES (:filename, :update, :property, :value)''', {'filename': filename, 'property': property_name, 'value': value, 'update': update_time.strftime("%Y-%m-%d")}) # noinspection PyTypeChecker def _get_file_property(self, filename, property_name): with self.__connection: self.__cursor.execute( '''SELECT f.'last_modif', f.'value' ''' + ''' FROM 'files' AS f''' + ''' WHERE f.'filename'=:filename AND f.'property'=:property ''', {'filename': filename, 'property': property_name}) res = self.__cursor.fetchone() if res is not None: return [dt.strptime(res['last_modif'], "%Y-%m-%d"), res['value']] def update_license_file(self, filename, license_id, update=dt.now().date()): self._update_file_property(filename, 'license_id', license_id, update) def update_brief_file(self, filename, brief, update=dt.now().date()): self._update_file_property(filename, 'brief', brief, update) def update_keep_authors_file(self, filename, keep_authors, update=dt.now().date()): self._update_file_property( filename, 'authors_keep', ';'.join([auth.user_name for auth in keep_authors]), update) def get_last_brief(self, filename): brief = self._get_file_property(filename, 'brief') if brief is not None: return brief[1] def get_last_keep_authors(self, filename, author_db): res = self._get_file_property(filename, 'authors_keep') if res is not None: authors = res[1].split(";") keep_authors = [] for auth in authors: if len(auth) != 0: a = author_db.find_by_user_name(auth) keep_authors.append(a) return keep_authors @property def version_config(self): if 'version' in self._configs: return int(self._configs['version']) return 1 class LicenserCSVDB(LicenserDB): __author_file = '' __copyright_file = '' __treated_files = dict() # noinspection PyUnusedLocal def __init__(self, filename, **kwargs): self.__config_file = filename with open(self.__config_file) as fh: split_lines = [line.strip().split(';') for line in fh] if len(split_lines) > 1: raise (Exception("Too many lines in the configuration file")) self._configs['project'], self.__author_file, self.__copyright_file, self._configs['versioning_backend'] = [ token.strip('"') for token in split_lines[0] ] with open('treated_files.list', 'r') as file_: for line in file_: f, date, lid = line.split(";") f = f.strip() date = dt.strptime(date, "%Y-%m-%d") self.__treated_files[f] = [date, lid.strip()] @property def authors(self): with open(self.__author_file) as fh: next(fh) split_lines = [line.strip().split(';') for line in fh] return [adb.Author.make((token.strip('"') for token in line)) for line in split_lines] def add_author(self, author): with open(self.__author_file, 'a') as fh: fh.write( ';'.join(['\"{0}\"'.format(val) for val in author]) + '\n') def find_file(self, filename): if filename in self.__treated_files: return self.__treated_files[filename] return None def update_license_file(self, filename, license_id, update=dt.now().date()): self.__treated_files[filename] = [ update.strftime("%Y-%m-%d"), license_id] with open('treated_files.list', 'w') as f: f.write("\n".join([";".join(f) for f in self.__treated_files.items])) @property def copyrights(self): with open(self.__copyright_file) as fh: split_lines = [line.strip().split(';') for line in fh] tab = ([[token.strip('"') for token in line] for line in split_lines]) return [CopyRight._make( [lid, on_modif, dt.strptime(start, "%d-%m-%Y").date(), dt.strptime(end, "%d-%m-%Y").date(), "".join([line for line in open(license_file)])]) for lid, on_modif, start, end, license_file in tab] diff --git a/pylicenser/vcs/git.py b/pylicenser/vcs/git.py index 1e27bc0..85101a8 100644 --- a/pylicenser/vcs/git.py +++ b/pylicenser/vcs/git.py @@ -1,161 +1,162 @@ """ git.py: Backend for GIT""" __author__ = "Guillaume Anciaux and Nicolas Richart" __credits__ = [ "Guillaume Anciaux ", "Nicolas Richart ", ] __copyright__ = "Copyright (©) 2010-2021 EPFL (Ecole Polytechnique Fédérale" \ " de Lausanne) Laboratory (LSMS - Laboratoire de Simulation" \ " en Mécanique des Solides)" __license__ = "GPLv3" __version__ = "2.0" import git import binascii import datetime as dt import re from .. import export from .. import version_info @export class GITInfo(version_info.VersionInfo): """ git implementation of the version info class """ __repo = None __other_names = [] def __init__(self, repo, filename, ignore_list, rev_to=None, **kwargs): super().__init__(repo, filename, ignore_list, **kwargs) self.__repo = git.Repo(self._repo) self._name = filename.replace(self._repo, '') commits = self._list_commits(rev_to=rev_to, ignore_list=ignore_list) self._modification_dates = [] for c in commits: if c.author.email not in ignore_list: - self._modification_dates.append(dt.datetime.fromtimestamp(c.authored_date)) + self._modification_dates.append( + dt.datetime.fromtimestamp(c.authored_date)) self.__other_names = self._list_names(commits) # noinspection PyIncorrectDocstring def populate(self, author_db, rev_from=None, rev_to=None): """ populate the internal variables """ _commits = self._list_commits(rev_to=rev_to, rev_from=rev_from) for c in _commits: email = c.author.email a = author_db.find_by_email(email) auth = a.user_name if auth not in self._author_list: self._author_list[auth] = [] if not author_db.is_author_in_ignore_list(a): self._authors.add(a) d = dt.datetime.fromtimestamp(c.authored_date) rev = c.hexsha msg = c.message self._author_list[auth].append((d, rev, msg)) def get_modifications(self, revision): hexsha = binascii.unhexlify(revision) commit = git.objects.commit.Commit(self.__repo, hexsha) stats = commit.stats.total patches = [] subproject_meta = re.compile( r'[-+]Subproject commit [0-9a-z]+$') header_modif = re.compile( r'''[-+] * ((@|__)author(__)?|brief|__license__|''' + - r'''__credits__|@date|(C|__c)opyright\s*=?\s*\(©\)) ''') + r'''__credits__|@date|(C|__c)opyright\s+\(©\)) ''') if len(commit.parents) == 2: #merge commit print('Skipping merge commit') return '\n'.join(patches), stats for parent in commit.parents: diffs = commit.diff( parent, create_patch=True, ignore_space_change=True, paths=[self.__other_names]) patch = [ d.diff.decode('utf-8', errors="surrogateescape") for d in diffs ] if len(patch) > 0 and not re.match( subproject_meta, str(patch)) and not re.match(header_modif, str(patch)): patches.extend(patch) return '\n'.join(patches), stats def _list_commits(self, rev_to=None, rev_from=None, ignore_list=None): if ignore_list is None: ignore_list = [] _args = {'follow': True, 'all': True, 'pretty': 'tformat:%H'} if rev_from is not None: _args['since'] = rev_from.strftime("%Y-%m-%d") if rev_to is not None: _args['until'] = rev_to.strftime("%Y-%m-%d") # git log can follow better renames in case there where badly done # (add/rm instead of mv) git_cmd = git.cmd.Git(working_dir=self.__repo.working_dir) str_c = git_cmd.log(self._name, **_args) if len(str_c) != 0: binhashes = (binascii.unhexlify(c) for c in str_c.split('\n')) else: binhashes = [] list_commits = [] for b in binhashes: c = git.objects.commit.Commit(self.__repo, b) if c.author.email not in ignore_list: list_commits.append(c) if len(list_commits) == 0: del _args['follow'] del _args['pretty'] return list(self.__repo.iter_commits(paths=self._name, **_args)) # list_commits = list(self.__repo.iter_commits(paths=self._name, **_args)) return list_commits def _list_names(self, commits): """ Finds all the names of a given file """ names = [self._name] for c in commits: for p in c.parents: diffs = c.diff(p, R=True, M=True) for d in diffs.iter_change_type('R'): if d.renamed and d.rename_to in names: new_path = d.rename_from self._names[new_path] = c.hexsha names.append(new_path) if len(self._names.keys()) == 0: c = commits[-1] self._names[self._name] = c.hexsha names.reverse() return names @property def oldest_name(self): return self.__other_names[0]