diff --git a/PythonLatex/latex_structure.py b/PythonLatex/latex_structure.py index d3631fd..516d984 100644 --- a/PythonLatex/latex_structure.py +++ b/PythonLatex/latex_structure.py @@ -1,357 +1,358 @@ #!/ usr / bin / python ################################################################ from __future__ import print_function ################################################################ import re import types import pyparsing as pp ################################################################ class LatexEnvironment(object): def __init__(self, toks): self.toks = toks self.content = self.toks[1:-1] self.head = self.toks[0] self.tail = self.toks[-1] self.name = self.head.toks[3] self.hide = False try: self.option = self.head.toks[6] except Exception as e: self.option = None # print('env: {0}:{1}:{2}'.format(self.name, self.option, self.content)) def __str__(self): if self.hide: return '' return ''.join([str(self.head)] + [str(i) for i in self.content] + [str(self.tail)]) def __getitem__(self, index): return self.toks[index] class LatexCommand(object): def __init__(self, toks): self.name = toks[1] self.toks = toks # def __str__(self):
        return ''.join(self.toks)

    def __getitem__(self, index):
        return self.toks[index]


class LatexBlock(object):
    def __init__(self, toks, name=None):
        self.toks = toks
        self.name = name

    def __str__(self):
        return ''.join([str(t) for t in self.toks])


class LatexMain(object):
    def __init__(self, toks):
        self.toks = toks
        self.content = self.toks
        self.name = "main"

    def __str__(self):
        res = ''.join([str(t) for t in self.toks])
        return res

    def __getitem__(self, index):
        return self.toks[index]


################################################################


class LatexStructure:
    def getBlocksFromType(self, typ):
        mylist = []

        def foo(b):
            if b.name == typ:
                mylist.append(b)

        self.pathInBlock(fbegin=foo)
        return mylist

    @staticmethod
    def ppValidCharacters():
        valid_characters = pp.printables
        valid_characters = valid_characters.replace('%', '')
        valid_characters = valid_characters.replace('{', '')
        valid_characters = valid_characters.replace('}', '')
        valid_characters = valid_characters.replace('\\', '')
        valid_characters += ' \t\r\n'
        return valid_characters

    @property
    def text(self):
        if '_text' not in self.__dict__:
            self._text = pp.Word(self.ppValidCharacters())
            self._text = (self._text | pp.Literal('\\\\') |
                          pp.Literal(r'\&') | pp.Literal(r'\%') |
                          pp.Literal(r'\#') |
                          pp.Literal(r'\_'))
            self._text.leaveWhitespace() return self._text

    @property
    def comment(self):
        if '_comment' not in self.__dict__:
            self._comment = pp.Literal('%')
            self._comment += pp.SkipTo(pp.LineEnd())
        return self._comment

    @property
    def block(self):
        if '_block' not in self.__dict__:
            _start = pp.Literal('{')
            _end = pp.Literal('}')
            _content = pp.Forward().leaveWhitespace()
            _block = _start + _content + _end
            self._block = _block
            _content << pp.ZeroOrMore(self.environment() | self.ppCommand() |
                                      self.block | self.text | self.comment)
            self._block.leaveWhitespace()

            def createBlock(toks):
                b = LatexBlock(toks)
                return b

            _block.addParseAction(createBlock)
        return self._block.leaveWhitespace()

    @staticmethod
    def ppCommand(name=None):
        _command = pp.Literal('\\').leaveWhitespace()
        if name is None:
            _command += pp.Word(pp.alphanums + '@').leaveWhitespace()
        else:
            _command += pp.Literal(name).leaveWhitespace()

        option = (
            pp.Literal('[') +
            pp.delimitedList(pp.Word(pp.alphanums), combine=True) +
            pp.Literal(']')).leaveWhitespace()

        valid_param_character = pp.printables + ' \t\r\n\\'
        valid_param_character = valid_param_character.replace('{', '')
        valid_param_character = valid_param_character.replace('}', '')
        param_name = pp.delimitedList(pp.Word(valid_param_character),
                                      combine=True)
        parameters = (
            pp.Literal('{') + param_name +
            pp.Literal('}').leaveWhitespace()).leaveWhitespace()

        _command += pp.ZeroOrMore(option | parameters).leaveWhitespace()

        def createCommand(toks):
            c = LatexCommand(toks)
            return c

        _command.addParseAction(createCommand) return _command.leaveWhitespace()

    def environment(self):
        _env_start = self.ppCommand('begin')
        _env_end = self.ppCommand('end')
        _env_content = pp.Forward().leaveWhitespace()
        _environment = _env_start + _env_content
        _environment += _env_end

        def set_excluding_command(toks):
            env_name = toks[0][3]
            _command_excluding = self.ppCommand()
            if env_name == 'python':
                python_block = pp.SkipTo(pp.Literal(r'\end{python}'))
                _env_content << python_block
            else:
                _env_content << pp.ZeroOrMore(
                    self.environment().leaveWhitespace() |
                    _command_excluding | self.block | self.text |
                    self.comment)

            def check(toks, env_name):
                if toks[0][1] != 'end':
                    return toks
                if toks[0][3] == env_name: return toks[652336456]
                return toks

            _command_excluding.addParseAction(
                lambda toks: check(toks, env_name))

        _env_start.addParseAction(set_excluding_command)

        def createEnvironment(toks):
            e = LatexEnvironment(toks)
            return e

        _environment.addParseAction(createEnvironment)
        return _environment.leaveWhitespace()

    def parseLatexFile(self, filename):
        fin = open(filename, 'r')
        inp = fin.read()
        fin.close()
        self.parseLatex(inp)

    def parseLatex(self, latex_code):
        _content = pp.ZeroOrMore(self.environment() | self.block |
                                 self.ppCommand() | self.text | self.comment)
        _content.leaveWhitespace()
        self._content = LatexMain(_content.parseString(latex_code))

    def buildLatexBlocks(self, filename, herited_types=dict()):
        fin = open(filename, 'r')
        inp = fin.readlines()
        inp = "".join(inp)

        latex_cmd_expr = r'(\\\w+(?:\[\w*\])*(?:{[\w|,|\.|(|)]*?})+)'
        splitted = re.split(latex_cmd_expr, inp)

        self.main_block = LatexBlock()
        self.current_block = self.main_block

        for i in splitted:
            m = re.match(r'\\begin{(.*?)}(.*)', i)
            if m:
                name = m.group(1)
                options = m.group(2)
                self.current_block = self.current_block.createSubBlock(
                    name, options, herited_types)
                continue

            m = re.match(r'\\end{(.*?)}', i)
            if m:
                name = m.group(1)
                try:
                    self.current_block = self.current_block.endSubBlock(name)
                except Exception as e:
                    print("AAAAAAAAAAAAAAAAAA")
                    print(e)
                continue

            self.current_block.appendContent(i)

        if not self.current_block == self.main_block:
            raise Exception(
                "one latex block was not closed: {0}".format(
                    self.current_block.name))

    def pathInBlockOld(self,
                       block=None,
                       begin_functor=None,
                       end_functor=None,
                       text_functor=None):
        if block is None:
            block = self.main_block

        if begin_functor is not None:
            begin_functor(block)

        for i in range(len(block.content)):
            c = block.content[i]
            if isinstance(c, types.InstanceType):
                try:
                    self.pathInBlock(c, begin_functor, end_functor,
                                     text_functor)
                except Exception as e:
                    print(e)
            else:
                if text_functor is not None:
                    block.content[i] = text_functor(block, c)

        if end_functor is not None:
            end_functor(block)

    def pathInBlock(self,
                    block=None,
                    begin_functor=None,
                    end_functor=None,
                    text_functor=None):
        if text_functor is None:

            def text_functor(b, c):
                return c

        if block is None:
            block = self._content

        if begin_functor is not None:
            begin_functor(block)

        for i, c in enumerate(block.content): if isinstance(c, LatexEnvironment):
                self.pathInBlock(c, begin_functor, end_functor, text_functor)
            else:
                block.content[i] = text_functor(block, str(c))

        if end_functor is not None:
            end_functor(block)

    def __str__(self):
        return str(self._content)

    @property
    def content(self):
        return self._content

    def __init__(self):
        self._content = None


################################################################

diff --git a/Slides/snippet_helper.py b/Slides/snippet_helper.py
index 36eb1e7..9f765f9 100644
--- a/Slides/snippet_helper.py
+++ b/Slides/snippet_helper.py
@@ -1,329 +1,332 @@
#!/usr/bin/env python3

import subprocess
import os
import re
from pygments import highlight
from pygments.lexers import CppLexer
from pygments.formatters import HtmlFormatter
from pygments.formatters import LatexFormatter
from pygments import token
from IPython.display import HTML
from tempfile import NamedTemporaryFile

################################################################


class Snippet(object):
    class CompilationError(Exception):
        """ Compilation error """
        pass

    default_output = 'html'
    default_line_numbering = False

    def __init__(self, inp, format_flag=False, compile_flag=False):
        try:
            self.loadfile(inp)
        except Exception as e:
            self.content = inp.strip()

        if format_flag:
            self.clang_format()

        if compile_flag:
            self.compile()

    def save(self, filename):
        f = open(filename, 'w')
        f.write(self.content)
        f.close()

    def loadfile(self, inp):
        filename = inp
        full_filename = os.path.realpath(filename)
        f = open(full_filename)
        self.content = f.read()
        f.close()

    def clang_format(self):
        f = NamedTemporaryFile()
        f.write(self.content.encode())
        f.flush()

        p = subprocess.Popen(
            'clang-format {0}'.format(f.name),
            shell=True,
            stdout=subprocess.PIPE)
        stdout, stderr = p.communicate()
        formatted_file = stdout
        self.content = formatted_file.decode('utf-8')

    def compile(self, working_dir='/tmp'):
        f = NamedTemporaryFile(suffix='.cpp', delete=False)
        f.write(self.content.encode())
        f.flush()

        working_dir = os.path.dirname(f.name) previous_dir = os.getcwd()
        os.chdir(working_dir)

        p = subprocess.Popen(
            'g++ -Wall -Wextra -I {1} -c {0}'.format(f.name, working_dir),
            shell=True,
            stderr=subprocess.PIPE)
        stdout, stderr = p.communicate()
        ret = p.returncode

        os.chdir(previous_dir)

        if ret != 0:
            raise Snippet.CompilationError('compilation failed\n' +
                                           stderr.decode('utf-8'))

        os.remove(f.name)
        o_file = os.path.splitext(f.name)[0] + '.o'
        os.remove(o_file)

    def get_lines(self):
        return self.content.split('\n')

    def get_content(self):
        return self.content

    def __str__(self):
        _content = self.content.split('\n')
        ret = ""
        for i, line in enumerate(_content):
            ret += '{0}: {1}\n'.format(i, line)
        return ret


class KeywordLexer(object):
    def __init__(self, keyword):
        if keyword == 'curly_brackets':
            keyword = '{|}'
        self.keyword = keyword

    def get_tokens(self, text):
        pattern = '(' + self.keyword + ')'
        splits = re.split(pattern, text)
        if len(splits) == 1:
            return [(token.Text, text)]

        res = []
        for count, s in enumerate(splits):
            if count % 2 == 0:
                res.append((token.Text, s))
            else:
                res.append((token.Keyword.Type, s)) return res


class LineLexer(object):
    def __init__(self, line_number):
        self.line_number = line_number

    def get_tokens(self, text):
        splits = text.split('\n')
        res = []
        splits1 = '\n'.join(splits[:self.line_number]) + '\n'
        splits2 = splits[self.line_number] + '\n'
        splits3 = '\n' + '\n'.join(splits[self.line_number + 1:]) + '\n'
        splits = [splits1] + [splits2] + [splits3]
        for count, s in enumerate(splits):
            if count == 1:
                res.append((token.Keyword.Type, s))
            else:
                res.append((token.Text, s))
        return res


class CustomLatexFormatter(LatexFormatter):
    def __init__(self, **kwargs):
        LatexFormatter.__init__(self, **kwargs)

    def format(self, tokens, outfile):
        res = []
        for t, s in tokens:
            if HtmlFormatter.format(self, tokens, outfile_final) # return from io import StringIO outfile = StringIO() res = [] ss = [] for t, s in tokens: if t == token.Keyword.Type: ss.append(s) s = 'A' * 24 t = token.Text res.append((t, s)) HtmlFormatter.format(self, res, outfile) final = outfile.getvalue() while ss: s = ss.pop() final = final.replace( 'A' * 24, '{0}'.format( s), 1) outfile_final.write(final) def pigment(self, keyword=None, line_highlight=None, start=None, end=None, output=None, line_numbering=None): _content = self.content.split('\n') _content = [(c + '\n') for c in _content] if start is None: start = 1 if end is None: end = len(_content) if output is None: output = self.default_output lexer = CppLexer() if keyword is not None: lexer = self.KeywordLexer(keyword) elif line_highlight is not None: - lexer = self.LineLexer(line_highlight - 1) + if line_highlight < start or line_highlight > end: + raise RuntimeError('line_highlight out of the sub snippet') + line_highlight -= start + lexer = self.LineLexer(line_highlight) if line_numbering is None: line_numbering = self.default_line_numbering if output == 'html': formatter = HtmlFormatter( full=False, linenos=line_numbering, linenostart=start) elif output == 'latex': formatter = LatexFormatter( full=False, linenos=line_numbering, linenostart=start) if (keyword is not None) or (line_highlight is not None): format_class = self.CustomHtmlFormatter if output == 'latex': format_class = self.CustomLatexFormatter formatter = format_class( full=False, linenos=line_numbering, linenostart=start) snip = _content[start - 1:end] snip = ''.join(snip) colored_snippet = highlight(snip, lexer, formatter) if output == 'html': HTML_TEMPLATE = """ {} """ css = formatter.get_style_defs() colored_snippet = HTML(HTML_TEMPLATE.format(css, colored_snippet)) return colored_snippet @staticmethod def getLatexStyleDefs(): return LatexFormatter(full=True).get_style_defs() ################################################################ class SnippetCollection(Snippet): def __init__(self): self.functions = dict() self.func_names = [] def addFunction(self, f_name, code): if f_name in self.functions: raise RuntimeError('function already declared') self.func_names.append(f_name) self.functions[f_name] = Snippet(code, format_flag=True) self.compile() def __getitem__(self, key): return self.functions[key] def flatten(self): global_input = """ #include #include #include #include """ + '/' * 70 + '\n\n' for f_name in self.func_names: global_input += "void {0}(){{\n".format(f_name) global_input += self.functions[f_name].content global_input += '\n}\n\n' + '/' * 70 + '\n\n' global_input += """ int main(int argc, char ** argv){\n""" for f_name in self.func_names: global_input += f_name + '();\n' global_input += '\n}\n' flat = Snippet(global_input, format_flag=True) return flat def compile(self, **kwargs): global_snippet = self.flatten() global_snippet.compile(**kwargs) def save(self, filename): global_snippet = self.flatten() open(filename, 'w').write(global_snippet.content) def pigment(self, *args): return self.flatten().pigment(*args) def last(self): return self.functions[self.func_names[-1]] ################################################################ # def printMatlab(code): # # html_snippet = highlight( # code, MatlabLexer(), # HtmlFormatter(full=True, style='colorful', # classprefix='matlab_')) # # display(HTML(html_snippet)) # # return html_snippet # ## testing #Snippet.default_output = 'latex' #Snippet.default_line_numbering = True # #snippet = SnippetCollection() # snippet.addFunction('if_syntax', ''' # int p, q; # if (p > q) { # /* # Statement1; # Statement2; # */ # } #''') # #print (snippet['if_syntax'].pigment()) diff --git a/tests/test_platex.py b/tests/test_platex.py index f7b8a94..b0141c1 100644 --- a/tests/test_platex.py +++ b/tests/test_platex.py @@ -1,288 +1,300 @@ #!/bin/env python3 # -*- coding: utf-8 -*- from __future__ import print_function import unittest try: from PythonLatex.latex_structure import LatexStructure except: pass class pLatexTest(unittest.TestCase): "Unit tests for pLatex" def setUp(self, ): pass def test_latex_parser_beamer(self): latex_code = r""" \documentclass[9pt,xcolor=dvipsnames]{beamer} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \usepackage{fancyvrb} \begin{python}{header} from Slides.snippet_helper import Snippet, SnippetCollection Snippet.default_output='latex' # Snippet.default_line_numbering=True \end{python} \py{Snippet.getLatexStyleDefs()} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \input{class.tex} \title{Chapter 4. Pointers} \begin{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \maketitle %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile]{Pointers and the Computer's Memory} \only<1>{ \py{yop} } \only<2>{ } $p_x$ ROW\_MAJOR \end{frame} \end{document} """ tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) f = open('tmp.tex', 'w') f.write(latex_code) f.close() f = open('tmp_output.tex', 'w') f.write(output) f.close() if latex_code != output: print(output) self.assertEqual(latex_code, output) def test_latex_parser_underscore(self): latex_code = r'ROW\_MAJOR' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) def test_latex_parser_nested_begin_end(self): latex_code = r''' \begin{toto}tatie \begin{re}tututoto \end{re} aa\end{toto} ''' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) def test_latex_parser_begin_end(self): latex_code = r'\begin{toto}tatie\end{toto}' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) def test_latex_parser_text(self): latex_code = r'tatie' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) def test_latex_parser_comment(self): latex_code = r''' % tatie toto ''' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) def test_latex_parser_command(self): latex_code = r' \documentclass[10pt]{article}' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) def test_latex_parser_nested_begin_command(self): latex_code = r'''{ yala {toto } \begin{align} tutu \begin{equation} toto \end{equation} \end{align} {\titi{ tutu} } }''' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) def test_latex_env(self): latex_code = r''' \begin{toto} tata \end{toto} \begin{tata} toto \end{tata} \begin{tutu} titi \begin{titi} \end{titi} \end{tutu} ''' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) blk_list = [] def foo(i): blk_list.append(i.name) blk_list_correct = ['main', 'toto', 'toto', 'tata', 'tata', 'tutu', 'titi', 'titi', 'tutu', 'main'] tex_struct.pathInBlock(begin_functor=foo, end_functor=foo) self.assertEqual(blk_list, blk_list_correct) def test_latex_python_code(self): latex_code = r''' \begin{python}{cpp} snippet = Snippet('code_snippets/hello.cpp') \end{python} ''' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) def test_latex_frame_includegraphics_only(self): latex_code = r''' \begin{frame}{What is a computer ?} \includegraphics<2>[width=\textwidth]{figures/computer-components.png} \end{frame} \begin{frame}{What is a program ?} toto \end{frame} ''' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) blk_list = [] def foo(i): blk_list.append(i.name) tex_struct.pathInBlock(begin_functor=foo, end_functor=foo) blk_list_correct = ['main', 'frame', 'frame', 'frame', 'frame', 'main'] self.assertEqual(blk_list, blk_list_correct) def test_latex_item_beamer(self): latex_code = r''' \begin{document} \begin{frame} \section{Class organization} \begin{itemize} \item<1-> Teaching staff: G. Anciaux, A. Nielsen, L. Pegolotti. \item<2-> Lectures: on Mondays, exercises on Fridays \item<3-> Follow chapters of the book: \href{http://link.springer.com/book/10.1007/978-1-4471-2736-9}{Guide To Scientific Computing in C++} \item<4-> Permanent homework: reading next chapter of the book \item<5-> Moodle (password: PCSC2017): material, forum (at the beginning \item<6-> Git: material, pdfs, solutions \item<7-> Evaluation: project realization and oral presentation \end{itemize} \end{frame} \begin{frame} \end{frame} \end{document} ''' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) blk_list = [] def foo(i): blk_list.append(i.name) tex_struct.pathInBlock(begin_functor=foo, end_functor=foo) blk_list_correct = ['main', 'document', 'frame', 'itemize', 'itemize', 'frame', 'frame', 'frame', 'document', 'main'] self.assertEqual(blk_list, blk_list_correct) def test_latex_python_frame(self): latex_code = r''' \begin{python}{header} b = 100 \end{python} \begin{document} \begin{frame} \end{frame} \begin{python}{cpp} a = 10 \end{python} \begin{frame}[fragile] \py{str(a)} \end{frame} \end{document} ''' tex_struct = LatexStructure() tex_struct.parseLatex(latex_code) output = str(tex_struct) self.assertEqual(latex_code, output) blk_list = [] def foo(i): blk_list.append(i.name) tex_struct.pathInBlock(begin_functor=foo, end_functor=foo) blk_list_correct = ['main', 'python', 'python', 'document', 'frame', 'frame', 'python', 'python', 'frame', 'frame', 'document', 'main'] self.assertEqual(blk_list, blk_list_correct) + + def test_latex_python_special_characters(self): + latex_code = r''' +\begin{document} +Toto is \% \\ \& \# \_ +Tata +\end{document} +''' + tex_struct = LatexStructure() + tex_struct.parseLatex(latex_code) + output = str(tex_struct) + self.assertEqual(latex_code, output)