Page MenuHomec4science

latex_structure.py
No OneTemporary

File Metadata

Created
Sat, Apr 27, 04:48

latex_structure.py

#!/ usr / bin / python
################################################################
from __future__ import print_function
################################################################
import re
import types
import pyparsing as pp
################################################################
class LatexEnvironment(object):
def __init__(self, toks):
self.toks = toks
self.content = self.toks[1:-1]
self.head = self.toks[0]
self.tail = self.toks[-1]
self.name = self.head.toks[2]
self.option = self.head.toks[5]
# print('env:', self.name)
self.hide = False
def __str__(self):
if self.hide:
return ''
return ''.join([str(self.head), '\n']
+ [str(i) for i in self.content]
+ [str(self.tail)])
def __getitem__(self, index):
return self.toks[index]
class LatexCommand(object):
def __init__(self, toks):
print(toks)
self.toks = toks
def __str__(self):
return ''.join(self.toks)
def __getitem__(self, index):
return self.toks[index]
class LatexBlock(object):
def __init__(self, toks, name=None):
self.toks = toks
self.name = name
def __str__(self):
return ''.join([str(t) for t in self.toks])
class LatexMain(object):
def __init__(self, toks):
self.toks = toks
self.content = self.toks
self.name = "main"
def __str__(self):
# for i in self.toks:
# print('LatexMain:\n', type(i), str(i))
res = ''.join([str(t) for t in self.toks])
# print(res)
return res
def __getitem__(self, index):
return self.toks[index]
################################################################
class LatexStructure:
def getBlocksFromType(self, typ):
mylist = []
def foo(b):
if b.name == typ:
mylist.append(b)
self.pathInBlock(fbegin=foo)
return mylist
@staticmethod
def ppValidCharacters():
valid_characters = pp.printables
valid_characters = valid_characters.replace('%', '')
valid_characters = valid_characters.replace('{', '')
valid_characters = valid_characters.replace('}', '')
valid_characters = valid_characters.replace('\\', '')
valid_characters += ' \t\r\n'
return valid_characters
@property
def text(self):
if '_text' not in self.__dict__:
self._text = pp.Word(self.ppValidCharacters())
self._text.skipWhitespace = False
self._text = (self._text |
pp.Literal('\\\\') |
pp.Literal(r'\&') |
pp.Literal(r'\%'))
return self._text
@property
def comment(self):
if '_comment' not in self.__dict__:
self._comment = pp.Literal('%')
self._comment += pp.SkipTo(pp.LineEnd())
self._comment.skipWhitespace = False
return self._comment
@property
def block(self):
if '_block' not in self.__dict__:
_start = pp.Literal('{')
_end = pp.Literal('}')
_content = pp.Forward()
_block = _start + _content + _end
self._block = _block
_content << pp.ZeroOrMore(self.environment |
self.ppCommand() |
self.block |
self.text |
self.comment)
_content.skipWhitespace = False
self._block.skipWhitespace = False
def createBlock(toks):
b = LatexBlock(toks)
return b
_block.addParseAction(createBlock)
return self._block
@staticmethod
def ppCommand(name=None, n_options=None):
if name is None:
_command = pp.Literal('\\') + pp.Word(pp.alphanums + '@')
else:
_command = pp.Literal('\\' + name)
option = (
pp.Literal('[') +
pp.delimitedList(pp.Word(pp.alphanums),
combine=True) +
pp.Literal(']'))
valid_param_character = pp.printables + ' \t\r\n\\'
valid_param_character = valid_param_character.replace('{', '')
valid_param_character = valid_param_character.replace('}', '')
param_name = pp.delimitedList(pp.Word(valid_param_character),
combine=True)
param_name.skipWhitespace = False
parameters = (
pp.Literal('{') + param_name + pp.Literal('}'))
parameters.skipWhitespace = False
_command += pp.ZeroOrMore(option | parameters)
_command.skipWhitespace = False
def createCommand(toks):
c = LatexCommand(toks)
return c
_command.addParseAction(createCommand)
return _command
@property
def environment(self):
if '_environment' in self.__dict__:
return self._environment
_env_start = self.ppCommand('begin', n_options=1)
_env_end = self.ppCommand('end')
_env_content = pp.Forward()
_env = _env_start + _env_content + _env_end
self._environment = _env
self._environment.skipWhitespace = False
def set_excluding_command(toks):
env_name = toks[0][2]
_command_excluding = self.ppCommand()
#print('command_excluding:', env_name)
if env_name == 'python':
python_block = pp.SkipTo(pp.Literal(r'\end{python}'))
python_block.skipWhitespace = False
_env_content << python_block
else:
_env_content << pp.ZeroOrMore(
_env |
_command_excluding |
self.block |
self.text |
self.comment)
def check(toks, env_name):
if toks[0][1] != 'end':
return toks
#print('check:', toks, env_name)
#print('check2:', toks[0], env_name)
#print('check2:', type(toks[0]), env_name)
#print('check3:', toks[0][3], env_name)
if toks[0][3] == env_name:
return toks[652336456]
return toks
_command_excluding.addParseAction(
lambda toks: check(toks, env_name))
_env_start.addParseAction(set_excluding_command)
def createEnvironment(toks):
e = LatexEnvironment(toks)
return e
_env.addParseAction(createEnvironment)
return self._environment
def parseLatexFile(self, filename):
fin = open(filename, 'r')
inp = fin.read()
fin.close()
self.parseLatex(inp)
def parseLatex(self, latex_code):
_content = pp.ZeroOrMore(self.environment |
self.block |
self.ppCommand() |
self.text |
self.comment)
_content.skipWhitespace = False
self._content = LatexMain(_content.parseString(latex_code))
def buildLatexBlocks(self, filename, herited_types=dict()):
fin = open(filename, 'r')
inp = fin.readlines()
inp = "".join(inp)
latex_cmd_expr = r'(\\\w+(?:\[\w*\])*(?:{[\w|,|\.|(|)]*?})+)'
splitted = re.split(latex_cmd_expr, inp)
self.main_block = LatexBlock()
self.current_block = self.main_block
for i in splitted:
m = re.match(r'\\begin{(.*?)}(.*)', i)
if m:
name = m.group(1)
options = m.group(2)
self.current_block = self.current_block.createSubBlock(
name, options, herited_types)
continue
m = re.match(r'\\end{(.*?)}', i)
if m:
name = m.group(1)
try:
self.current_block = self.current_block.endSubBlock(name)
except Exception as e:
print("AAAAAAAAAAAAAAAAAA")
print(e)
continue
self.current_block.appendContent(i)
if not self.current_block == self.main_block:
raise Exception(
"one latex block was not closed: {0}".format(
self.current_block.name))
def pathInBlockOld(self,
block=None,
begin_functor=None,
end_functor=None,
text_functor=None):
if block is None:
block = self.main_block
if begin_functor is not None:
begin_functor(block)
for i in range(len(block.content)):
c = block.content[i]
if isinstance(c, types.InstanceType):
try:
self.pathInBlock(c,
begin_functor,
end_functor,
text_functor)
except Exception as e:
print(e)
else:
if text_functor is not None:
block.content[i] = text_functor(block, c)
if end_functor is not None:
end_functor(block)
def pathInBlock(self,
block=None,
begin_functor=None,
end_functor=None,
text_functor=None):
if text_functor is None:
def text_functor(b, c):
return c
if block is None:
block = self._content
if begin_functor is not None:
begin_functor(block)
for i, c in enumerate(block.content):
# print(type(c), c)
if isinstance(c, LatexEnvironment):
self.pathInBlock(c,
begin_functor,
end_functor,
text_functor)
else:
block.content[i] = text_functor(block, str(c))
if end_functor is not None:
end_functor(block)
def __str__(self):
return str(self._content)
@property
def content(self):
return self._content
def __init__(self):
self._content = None
################################################################

Event Timeline