Page MenuHomec4science

latex_structure.py
No OneTemporary

File Metadata

Created
Sat, Apr 27, 10:02

latex_structure.py

#!/usr/bin/python
################################################################
from __future__ import print_function
################################################################
import re
import types
import pyparsing as pp
################################################################
class LatexEnvironment(object):
def __init__(self, toks):
self.toks = toks
print('env:', self)
def __str__(self):
return ''.join([str(t) for t in self.toks])
def __getitem__(self, index):
return self.toks[index]
class LatexCommand(object):
def __init__(self, toks):
self.toks = toks
print('command:', toks)
def __str__(self):
return ''.join(self.toks)
def __getitem__(self, index):
return self.toks[index]
class LatexBlock(object):
def __init__(self, toks):
self.toks = toks
print('block:', self)
def __str__(self):
return ''.join([str(t) for t in self.toks])
class LatexMain(object):
def __init__(self, toks):
self.toks = toks
def __str__(self):
return ''.join([str(t) for t in self.toks])
################################################################
class LatexStructure:
def getBlocksFromType(self, typ):
mylist = []
def foo(b):
if b.name == typ:
mylist.append(b)
self.pathInBlock(fbegin=foo)
return mylist
@staticmethod
def ppValidCharacters():
valid_characters = pp.printables
valid_characters = valid_characters.replace('%', '')
valid_characters = valid_characters.replace('{', '')
valid_characters = valid_characters.replace('}', '')
valid_characters = valid_characters.replace('\\', '')
valid_characters += ' \t\r\n'
return valid_characters
@property
def text(self):
if '_text' not in self.__dict__:
self._text = pp.Word(self.ppValidCharacters())
self._text.skipWhitespace = False
self._text = (self._text |
pp.Literal('\\\\') |
pp.Literal(r'\&') |
pp.Literal(r'\%'))
return self._text
@property
def comment(self):
if '_comment' not in self.__dict__:
self._comment = pp.Literal('%')
self._comment += pp.SkipTo(pp.LineEnd())
self._comment.skipWhitespace = False
return self._comment
@property
def block(self):
if '_block' not in self.__dict__:
_start = pp.Literal('{')
_end = pp.Literal('}')
_content = pp.Forward()
_block = _start + _content + _end
self._block = _block
_content << pp.ZeroOrMore(self.environment |
self.ppCommand() |
self.block |
self.text |
self.comment)
_content.skipWhitespace = False
self._block.skipWhitespace = False
def createBlock(toks):
b = LatexBlock(toks)
# print('block:', b)
return b
_block.addParseAction(createBlock)
return self._block
@staticmethod
def ppCommand(name=None, n_options=None):
if name is None:
_command = pp.Literal('\\') + pp.Word(pp.alphanums + '@')
else:
_command = pp.Literal('\\' + name)
option = (
pp.Literal('[') +
pp.delimitedList(pp.Word(pp.alphanums),
combine=True) +
pp.Literal(']'))
valid_param_character = pp.printables + ' \t\r\n\\'
valid_param_character = valid_param_character.replace('{', '')
valid_param_character = valid_param_character.replace('}', '')
parameters = (
pp.Literal('{') +
pp.delimitedList(pp.Word(valid_param_character),
combine=True) +
pp.Literal('}'))
_command += pp.ZeroOrMore(option | parameters)
_command.skipWhitespace = False
def createCommand(toks):
c = LatexCommand(toks)
# print('command:', c)
return c
_command.addParseAction(createCommand)
return _command
@property
def environment(self):
if '_environment' in self.__dict__:
return self._environment
_env_start = self.ppCommand('begin', n_options=1)
_env_end = self.ppCommand('end')
_env_content = pp.Forward()
_env = _env_start + _env_content + _env_end
self._environment = _env
self._environment.skipWhitespace = False
def set_excluding_command(toks):
env_name = toks[0][2]
_command_excluding = self.ppCommand()
# print('command_excluding:', env_name)
if env_name == 'python':
python_block = pp.SkipTo(pp.Literal(r'\end{python}'))
python_block.skipWhitespace = False
_env_content << python_block
else:
_env_content << pp.ZeroOrMore(
_env |
_command_excluding |
self.block |
self.text |
self.comment)
def check(toks, env_name):
if toks[0][1] != 'end':
return toks
# print('check:', toks, env_name)
# print('check2:', toks[0], env_name)
# print('check2:', type(toks[0]), env_name)
# print('check3:', toks[0][3], env_name)
if toks[0][3] == env_name:
return toks[652336456]
return toks
_command_excluding.addParseAction(
lambda toks: check(toks, env_name))
_env_start.addParseAction(set_excluding_command)
def createEnvironment(toks):
e = LatexEnvironment(toks)
print('env:', e)
return e
_env.addParseAction(createEnvironment)
return self._environment
def parseLatexFile(self, filename):
# self.main_block = LatexBlock()
# self.current_block = self.main_block
fin = open(filename, 'r')
inp = fin.read()
# inp = r' \begin{toto}tatie \begin{re}tututoto \end{re} aa\end{toto}'
# inp = r'\begin{toto}tatie\end{toto}'
# inp = r'tatie'
# inp = r'\documentclass[10pt]{article}'
# inp = r'{ yala {toto} \begin{align} tutu \begin{equation} toto \end{equation} \end{align} {\titi { tutu} } }'
# print (inp)
_content = pp.ZeroOrMore(self.environment |
self.block |
self.ppCommand() |
self.text |
self.comment)
_content.skipWhitespace = False
self._content = LatexMain(_content.parseString(inp))
def buildLatexBlocks(self, filename, herited_types=dict()):
fin = open(filename, 'r')
inp = fin.readlines()
inp = "".join(inp)
latex_cmd_expr = r'(\\\w+(?:\[\w*\])*(?:{[\w|,|\.|(|)]*?})+)'
splitted = re.split(latex_cmd_expr, inp)
self.main_block = LatexBlock()
self.current_block = self.main_block
for i in splitted:
m = re.match(r'\\begin{(.*?)}(.*)', i)
if m:
name = m.group(1)
options = m.group(2)
self.current_block = self.current_block.createSubBlock(
name, options, herited_types)
continue
m = re.match(r'\\end{(.*?)}', i)
if m:
name = m.group(1)
try:
self.current_block = self.current_block.endSubBlock(name)
except Exception as e:
print("AAAAAAAAAAAAAAAAAA")
print(e)
continue
self.current_block.appendContent(i)
if not self.current_block == self.main_block:
raise Exception(
"one latex block was not closed: {0}".format(
self.current_block.name))
def pathInBlock(self,
block=None,
begin_functor=None,
end_functor=None,
text_functor=None):
if block is None:
block = self.main_block
if begin_functor is not None:
begin_functor(block)
for i in range(len(block.content)):
c = block.content[i]
if isinstance(c, types.InstanceType):
try:
self.pathInBlock(c,
begin_functor,
end_functor,
text_functor)
except Exception as e:
print(e)
else:
if text_functor is not None:
block.content[i] = text_functor(block, c)
if end_functor is not None:
end_functor(block)
def __str__(self):
return str(self._content)
def __init__(self):
self._content = None
################################################################
if __name__ == '__main__':
import sys
filename_in = sys.argv[1]
filename_out = sys.argv[2]
tex_struct = LatexStructure()
tex_struct2 = LatexStructure()
# tex_struct.buildLatexBlocks(filename_in)
tex_struct2.parseLatexFile(filename_in)
open(filename_out, 'w').write(str(tex_struct))
open(filename_out+'-2', 'w').write(str(tex_struct2))

Event Timeline