Page MenuHomec4science

ttree_lex.py
No OneTemporary

File Metadata

Created
Wed, Jun 19, 18:34

ttree_lex.py

This document is not UTF8. It was detected as ISO-8859-1 (Latin 1) and converted to UTF8 for display.
# -*- coding: iso-8859-1 -*-
# Author: Andrew Jewett (jewett.aij at g mail)
# http://www.chem.ucsb.edu/~sheagroup
# License: 3-clause BSD License (See LICENSE.TXT)
# Copyright (c) 2012, Regents of the University of California
# All rights reserved.
"""A lexical analyzer class for simple shell-like syntaxes.
This version has been modified slightly to work better with unicode.
It was forked from the version of shlex that ships with python 3.2.2.
A few minor features and functions have been added. """
# Module and documentation by Eric S. Raymond, 21 Dec 1998
# Input stacking and error message cleanup added by ESR, March 2000
# push_source() and pop_source() made explicit by ESR, January 2001.
# Posix compliance, split(), string arguments, and
# iterator interface by Gustavo Niemeyer, April 2003.
# ("wordterminators" (unicode support) hack by Andrew Jewett September 2011)
import os.path
import sys
from collections import deque
import re, fnmatch
import string
#import gc
try:
from cStringIO import StringIO
except ImportError:
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
__all__ = ["TtreeShlex",
"split",
"LineLex",
"SplitQuotedString",
"EscCharStrToChar",
"SafelyEncodeString",
"RemoveOuterQuotes",
"MaxLenStr",
"HasWildCard",
#"IsRegex",
"InputError",
"ErrorLeader",
"SrcLoc",
"OSrcLoc",
"TextBlock",
"VarRef",
"VarNPtr",
"VarBinding",
"SplitTemplate",
"SplitTemplateMulti",
"TableFromTemplate",
"ExtractCatName",
#"_TableFromTemplate",
#"_DeleteLineFromTemplate",
"DeleteLinesWithBadVars",
"TemplateLexer"]
class TtreeShlex(object):
""" A lexical analyzer class for simple shell-like syntaxes.
TtreeShlex is a backwards-compatible version of python's standard shlex
module. It has the additional member: "self.wordterminators", which
overrides the "self.wordchars" member. This enables better handling of
unicode characters by allowing a much larger variety of characters to
appear in words or tokens parsed by TtreeShlex.
"""
custom_path = None
def __init__(self,
instream=None,
infile=None,
custom_include_path=None,
posix=False):
if isinstance(instream, str):
instream = StringIO(instream)
if instream is not None:
self.instream = instream
self.infile = infile
else:
self.instream = sys.stdin
self.infile = None
self.posix = posix
if posix:
self.eof = None
else:
self.eof = ''
self.commenters = '#'
self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
if self.posix:
self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
self.wordterminators = set([]) #WORDTERMINATORS
self.prev_space_terminator = '' #WORDTERMINATORS
self.whitespace = ' \t\r\f\n'
self.whitespace_split = False
self.quotes = '\'"'
self.escape = '\\'
self.escapedquotes = '"'
self.state = ' '
self.pushback = deque()
self.lineno = 1
self.debug = 0
self.token = ''
self.filestack = deque()
# self.source_triggers
# are tokens which allow the seamless insertion of other
# files into the file being read.
self.source_triggers=set(['source'])
self.source_triggers_x=set([])
#Note: self.source_triggers_x
# This is a subset of self.source_triggers.
# In this case file inclusion is exclusive.
# In other words, if one of these tokens
# is encountered, the file is only included
# if it has not been included already.
self.source_files_restricted = set([])
self.include_path = []
if TtreeShlex.custom_path:
include_path_list = TtreeShlex.custom_path.split(':')
self.include_path += [d for d in include_path_list if len(d)>0]
if 'TTREE_PATH' in os.environ:
include_path_list = os.environ['TTREE_PATH'].split(':')
self.include_path += [d for d in include_path_list if len(d)>0]
if self.debug:
print('TtreeShlex: reading from %s, line %d' \
% (self.instream, self.lineno))
self.end_encountered = False
@staticmethod #WORDTERMINATORS
def _belongs_to(char, include_chars, exclude_chars): #WORDTERMINATORS
if ((not exclude_chars) or (len(exclude_chars)==0)): #WORDTERMINATORS
return char in include_chars #WORDTERMINATORS
else: #WORDTERMINATORS
return char not in exclude_chars #WORDTERMINATORS
def push_raw_text(self, text):
"""Push a block of text onto the stack popped by the ReadLine() method.
(If multiple lines are present in the text, (which is determined by
self.line_terminators), then the text is split into multiple lines
and each one of them is pushed onto this stack individually.
The "self.lineno" counter is also adjusted, depending on the number
of newline characters in "line".
Do not strip off the newline, or other line terminators
at the end of the text block before using push_raw_text()!
"""
if self.debug >= 1:
print("TtreeShlex: pushing token " + repr(text))
for c in reversed(text): #WORDTERMINATORS
self.pushback.appendleft(c) #WORDTERMINATORS
if c == '\n': #WORDTERMINATORS
self.lineno -= 1 #WORDTERMINATORS
if len(text) > 0: #WORDTERMINATORS
self.end_encountered = False #WORDTERMINATORS
def push_token(self, text):
"Push a token onto the stack popped by the get_token method"
self.push_raw_text(text+self.prev_space_terminator)
def push_source(self, newstream, newfile=None):
"Push an input source onto the lexer's input source stack."
if isinstance(newstream, str):
newstream = StringIO(newstream)
self.filestack.appendleft((self.infile, self.instream, self.lineno))
self.infile = newfile
self.instream = newstream
self.lineno = 1
if self.debug:
if newfile is not None:
print('TtreeShlex: pushing to file %s' % (self.infile,))
else:
print('TtreeShlex: pushing to stream %s' % (self.instream,))
def pop_source(self):
"Pop the input source stack."
self.instream.close()
(self.infile, self.instream, self.lineno) = self.filestack.popleft()
if self.debug:
print('TtreeShlex: popping to %s, line %d' \
% (self.instream, self.lineno))
self.state = ' '
def get_token(self):
"Get a token from the input stream (or from stack if it's nonempty)"
#### #CHANGING: self.pushback is now a stack of characters, not tokens #WORDTERMINATORS
#### if self.pushback: #WORDTERMINATORS
#### tok = self.pushback.popleft() #WORDTERMINATORS
#### if self.debug >= 1: #WORDTERMINATORS
#### print("TtreeShlex: popping token " + repr(tok)) #WORDTERMINATORS
#### return tok #WORDTERMINATORS
#### No pushback. Get a token. #WORDTERMINATORS
raw = self.read_token()
# Handle inclusions
if self.source_triggers is not None:
while raw in self.source_triggers:
fname=self.read_token()
spec = self.sourcehook(fname)
if spec:
(newfile, newstream) = spec
if ((raw not in self.source_triggers_x) or
(newfile not in self.source_files_restricted)):
self.push_source(newstream, newfile)
if raw in self.source_triggers_x:
self.source_files_restricted.add(newfile)
else:
if self.debug >= 0:
sys.stderr.write('\ndebug warning: duplicate attempt to import file:\n \"'+newfile+'\"\n')
raw = self.get_token()
# Maybe we got EOF instead?
while raw == self.eof:
if not self.filestack:
return self.eof
else:
self.pop_source()
raw = self.get_token()
# Neither inclusion nor EOF
if self.debug >= 1:
if raw != self.eof:
print("TtreeShlex: token=" + repr(raw))
else:
print("TtreeShlex: token=EOF")
if raw == self.eof: #WORDTERMINATORS
self.end_encountered = True #WORDTERMINATORS
return raw
def read_char(self):
if self.pushback: #WORDTERMINATORS
nextchar = self.pushback.popleft() #WORDTERMINATORS
assert((type(nextchar) is str) and (len(nextchar)==1)) #WORDTERMINATORS
else: #WORDTERMINATORS
nextchar = self.instream.read(1) #WORDTERMINATORS
return nextchar
def read_token(self):
self.prev_space_terminator = '' #WORDTERMINATORS
quoted = False
escapedstate = ' '
while True:
#### self.pushback is now a stack of characters, not tokens #WORDTERMINATORS
nextchar = self.read_char()
if nextchar == '\n':
self.lineno = self.lineno + 1
if self.debug >= 3:
print("TtreeShlex: in state", repr(self.state), \
"I see character:", repr(nextchar))
if self.state is None:
self.token = '' # past end of file
break
elif self.state == ' ':
if not nextchar:
self.state = None # end of file
break
elif nextchar in self.whitespace:
if self.debug >= 2:
print("TtreeShlex: I see whitespace in whitespace state")
if self.token or (self.posix and quoted):
# Keep track of which whitespace
# character terminated the token.
self.prev_space_terminator = nextchar #WORDTERMINATORS
break # emit current token
else:
continue
elif nextchar in self.commenters:
self.instream.readline()
self.lineno = self.lineno + 1
elif self.posix and nextchar in self.escape:
escapedstate = 'a'
self.state = nextchar
elif TtreeShlex._belongs_to(nextchar, #WORDTERMINATORS
self.wordchars, #WORDTERMINATORS
self.wordterminators):#WORDTERMINATORS
self.token = nextchar
self.state = 'a'
elif nextchar in self.quotes:
if not self.posix:
self.token = nextchar
self.state = nextchar
elif self.whitespace_split:
self.token = nextchar
self.state = 'a'
else:
self.token = nextchar
if self.token or (self.posix and quoted):
break # emit current token
else:
continue
elif self.state in self.quotes:
quoted = True
if not nextchar: # end of file
if self.debug >= 2:
print("TtreeShlex: I see EOF in quotes state")
# XXX what error should be raised here?
raise ValueError("Error at or before "+self.error_leader()+"\n"
" No closing quotation.")
if nextchar == self.state:
if not self.posix:
self.token = self.token + nextchar
self.state = ' '
break
else:
self.state = 'a'
elif self.posix and nextchar in self.escape and \
self.state in self.escapedquotes:
escapedstate = self.state
self.state = nextchar
else:
self.token = self.token + nextchar
elif self.state in self.escape:
if not nextchar: # end of file
if self.debug >= 2:
print("TtreeShlex: I see EOF in escape state")
# XXX what error should be raised here?
raise ValueError("No escaped character")
# In posix shells, only the quote itself or the escape
# character may be escaped within quotes.
if escapedstate in self.quotes and \
nextchar != self.state and nextchar != escapedstate:
self.token = self.token + self.state
self.token = self.token + nextchar
self.state = escapedstate
elif self.state == 'a':
if not nextchar:
self.state = None # end of file
break
elif nextchar in self.whitespace:
if self.debug >= 2:
print("TtreeShlex: I see whitespace in word state")
self.state = ' '
if self.token or (self.posix and quoted):
# Keep track of which whitespace
# character terminated the token.
self.prev_space_terminator = nextchar #WORDTERMINATORS
break # emit current token
else:
continue
elif nextchar in self.commenters:
comment_contents = self.instream.readline()
self.lineno = self.lineno + 1
if self.posix:
self.state = ' '
if self.token or (self.posix and quoted):
# Keep track of which character(s) terminated
# the token (including whitespace and comments).
self.prev_space_terminator = nextchar + comment_contents #WORDTERMINATORS
break # emit current token
else:
continue
elif self.posix and nextchar in self.quotes:
self.state = nextchar
elif self.posix and nextchar in self.escape:
escapedstate = 'a'
self.state = nextchar
elif (TtreeShlex._belongs_to(nextchar, #WORDTERMINATORS
self.wordchars, #WORDTERMINATORS
self.wordterminators)#WORDTERMINATORS
or (nextchar in self.quotes) #WORDTERMINATORS
or (self.whitespace_split)): #WORDTERMINATORS
self.token = self.token + nextchar
else:
self.pushback.appendleft(nextchar)
if self.debug >= 2:
print("TtreeShlex: I see punctuation in word state")
self.state = ' '
if self.token:
break # emit current token
else:
continue
result = self.token
self.token = ''
if self.posix and not quoted and result == '':
result = None
if self.debug > 1:
if result:
print("TtreeShlex: raw token=" + repr(result))
else:
print("TtreeShlex: raw token=EOF")
return result
def sourcehook(self, newfile):
"Hook called on a filename to be sourced."
newfile = RemoveOuterQuotes(newfile)
# This implements cpp-like semantics for relative-path inclusion.
if isinstance(self.infile, str) and not os.path.isabs(newfile):
newfile_full = os.path.join(os.path.dirname(self.infile), newfile)
try:
f = open(newfile_full, "r")
except IOError:
# If not found,
err = True
# ...then check to see if the file is in one of the
# directories in the self.include_path list.
for d in self.include_path:
newfile_full = os.path.join(d, newfile)
try:
f = open(newfile_full, "r")
err = False
break
except IOError:
err=True
if err:
raise InputError('Error at '+self.error_leader()+'\n'
' unable to open file \"'+newfile+'\"\n'
' for reading.\n')
return (newfile, f)
def error_leader(self, infile=None, lineno=None):
"Emit a C-compiler-like, Emacs-friendly error-message leader."
if infile is None:
infile = self.infile
if lineno is None:
lineno = self.lineno
return "\"%s\", line %d: " % (infile, lineno)
def __iter__(self):
return self
def __next__(self):
token = self.get_token()
if token == self.eof:
raise StopIteration
return token
def __bool__(self):
return not self.end_encountered
# For compatibility with python 2.x, I must also define:
def __nonzero__(self):
return self.__bool__()
# The split() function was originally from shlex
# It is included for backwards compatibility.
def split(s, comments=False, posix=True):
lex = TtreeShlex(s, posix=posix)
lex.whitespace_split = True
if not comments:
lex.commenters = ''
return list(lex)
##################### NEW ADDITIONS (may be removed later) #################
#"""
# -- linelex.py --
#linelex.py defines the LineLex class, which inherits from, and further
#augments the capabilities of TtreeShlex by making it easier to parse
#individual lines one at a time. (The original shlex's "source" inclusion
#ability still works when reading entire lines, and lines are still counted.)
#
#"""
#import sys
class InputError(Exception):
""" A generic exception object containing a string for error reporting.
(Raising this exception implies that the caller has provided
a faulty input file or argument.)
"""
def __init__(self, err_msg):
self.err_msg = err_msg
def __str__(self):
return self.err_msg
def __repr__(self):
return str(self)
def ErrorLeader(infile, lineno):
return '\"'+infile+'\", line '+str(lineno)
class SrcLoc(object):
""" SrcLoc is essentially nothing more than a 2-tuple containing the name
of a file (str) and a particular line number inside that file (an integer).
"""
__slots__=["infile","lineno"]
def __init__(self, infile='', lineno=-1):
self.infile = infile
self.lineno = lineno
def SplitQuotedString(string,
quotes='\'\"',
delimiters=' \t\r\f\n',
escape='\\',
comment_char='#'):
tokens = []
token = ''
reading_token = True
escaped_state = False
quote_state = None
for c in string:
if (c in comment_char) and (not escaped_state) and (quote_state==None):
tokens.append(token)
return tokens
elif (c in delimiters) and (not escaped_state) and (quote_state==None):
if reading_token:
tokens.append(token)
token = ''
reading_token = False
elif c in escape:
if escaped_state:
token += c
reading_token = True
escaped_state = False
else:
escaped_state = True
# and leave c (the '\' character) out of token
elif (c in quotes) and (not escaped_state):
if (quote_state != None):
if (c == quote_state):
quote_state = None
else:
quote_state = c
token += c
reading_token = True
else:
if (c == 'n') and (escaped_state == True):
c = '\n'
elif (c == 't') and (escaped_state == True):
c = '\t'
elif (c == 'r') and (escaped_state == True):
c = '\r'
elif (c == 'f') and (escaped_state == True):
c = '\f'
token += c
reading_token = True
escaped_state = False
if len(string) > 0:
tokens.append(token)
return tokens
def EscCharStrToChar(s_in, escape='\\'):
"""
EscCharStrToChar() replaces any escape sequences
in a string with their 1-character equivalents.
"""
assert(len(escape) > 0)
out_lstr = []
escaped_state = False
for c in s_in:
if escaped_state:
if (c == 'n'):
out_lstr.append('\n')
elif (c == 't'):
out_lstr.append('\t')
elif (c == 'r'):
out_lstr.append('\r')
elif (c == 'f'):
out_lstr.append('\f')
elif (c == '\''):
out_lstr.append('\'')
elif (c == '\"'):
out_lstr.append('\"')
elif c in escape:
out_lstr.append(c)
else:
out_lstr.append(escape+c) # <- keep both characters
escaped_state = False
else:
if c in escape:
escaped_state = True
else:
out_lstr.append(c)
return ''.join(out_lstr)
def SafelyEncodeString(in_str,
quotes='\'\"',
delimiters=' \t\r\f\n',
escape='\\',
comment_char='#'):
"""
SafelyEncodeString(in_str) scans through the input string (in_str),
and returns a new string in which probletic characters
(like newlines, tabs, quotes, etc), are replaced by their two-character
backslashed equivalents (like '\n', '\t', '\'', '\"', etc).
The escape character is the backslash by default, but it too can be
overridden to create custom escape sequences
(but this does not effect the encoding for characters like '\n', '\t').
"""
assert(len(escape) > 0)
out_lstr = []
use_outer_quotes = False
for c in in_str:
if (c == '\n'):
c = '\\n'
elif (c == '\t'):
c = '\\t'
elif (c == '\r'):
c = '\\r'
elif (c == '\f'):
c = '\\f'
elif c in quotes:
c = escape[0]+c
elif c in escape:
c = c+c
elif c in delimiters:
use_outer_quotes = True
# hmm... that's all that comes to mind. Did I leave anything out?
out_lstr.append(c)
if use_outer_quotes:
out_lstr = ['\"'] + out_lstr + ['\"']
return ''.join(out_lstr)
def RemoveOuterQuotes(text, quotes='\"\''):
if ((len(text)>=2) and (text[0] in quotes) and (text[-1]==text[0])):
return text[1:-1]
else:
return text
def MaxLenStr(s1, s2):
if len(s2) > len(s1):
return s2
else:
return s1
#def IsRegex(pat):
# """
# Check to see if string (pat) is bracketed by slashes.
#
# """
# return (len(pat)>=2) and (pat[0]=='/') and (pat[-1] == '/')
def HasWildCard(pat):
"""
Returns true if a string (pat) contains a '*' or '?' character.
"""
return (pat.find('*') != -1) or (pat.find('?') != -1)
#def HasWildCard(pat):
# """
# Returns true if a string (pat) contains a non-backslash-protected
# * or ? character.
#
# """
# N=len(pat)
# i=0
# while i < N:
# i = pat.find('*', i, N)
# if i == -1:
# break
# elif (i==0) or (pat[i-1] != '\\'):
# return True
# i += 1
# i=0
# while i < N:
# i = pat.find('?', i, N)
# if i == -1:
# break
# elif (i==0) or (pat[i-1] != '\\'):
# return True
# i += 1
# return False
def MatchesPattern(s, pattern):
if type(pattern) is str:
#old code:
#if ((len(s) > 1) and (s[0] == '/') and (s[-1] == '/'):
# re_string = p[1:-1] # strip off the slashes '/' and '/'
# if not re.search(re_string, s):
# return False
#new code:
# uses precompiled regular expressions (See "pattern.search" below)
if HasWildCard(pattern):
if not fnmatch.fnmatchcase(s, pattern):
return False
elif s != pattern:
return False
else:
#assert(type(p) is _sre.SRE_Match)
# I assume pattern = re.compile(some_reg_expr)
if not pattern.search(s):
return False
return True
def MatchesAll(multi_string, pattern):
assert(len(multi_string) == len(pattern))
for i in range(0, len(pattern)):
if not MatchesPattern(multi_string[i], pattern[i]):
return False
return True
class LineLex(TtreeShlex):
""" This class extends the TtreeShlex module (a slightly modified
version of the python 3.2.2 version of shlex). LineLex has the
ability to read one line at a time (in addition to one token at a time).
(Many files and scripts must be parsed one line at a time instead of one
token at a time. In these cases, the whitespace position also matters.)
Arguably, this class might not be necessary.
I could get rid of this class completely. That would be nice. To do that
we would need to augment and generalize shlex's get_token() member function
to make it read lines, not just tokens. Of course, you can always
change the wordchars (or wordterminators). Even so, there are two other
difficulties using the current version of shlex.get_token() to read lines:
1) File inclusion happen whenever the beginning of a line/token matches one
of the "source_triggers" (not the whole line as required by get_token()).
2) Lines ending in a special character (by default the backslash character)
continue on to the next line.
This code seems to work on our test files, but I'm sure there are bugs.
Andrew 2012-3-25
"""
def __init__(self,
instream=None,
infile=None,
posix=False):
TtreeShlex.__init__(self, instream, infile, posix)
self.line_terminators = '\n'
self.line_extend_chars = '\\'
self.skip_comments_during_readline = True
def _StripComments(self, line):
if self.skip_comments_during_readline:
for i in range(0, len(line)):
if ((line[i] in self.commenters) and
((i==0) or (line[i-1] not in self.escape))):
return line[:i]
return line
def _ReadLine(self,
recur_level=0):
"""
This function retrieves a block of text, halting at a
terminal character. Escape sequences are respected.
The self.lineno (newline counter) is also maintained.
The main difference between Readline and get_token()
is the way they handle the "self.source_triggers" member.
Both Readline() and get_token() insert text from other files when they
encounter a string in "self.source_triggers" in the text they read.
However ReadLine() ONLY inserts text from other files if the token which
matches with self.source_triggers appears at the beginning of the line.
get_token() inserts text only if lex.source matches the entire token.
comment-to-self:
At some point, once I'm sure this code is working, I should replace
shlex.get_token() with the code from ReadLine() which is more general.
It would be nice to get rid of "class LineLex" entirely. ReadLine()
is the only new feature that LineLex which was lacking in shlex.
To do this I would need to add a couple optional arguments to
"get_token()", allowing it to mimic ReadLine(), such as:
"override_wordterms" argument (which we can pass a '\n'), and
"token_extender" argument (like '\' for extending lines)
"""
first_token=''
line = ''
escaped_state = False
found_space = False
while True:
nextchar = self.read_char()
#sys.stderr.write('nextchar=\"'+nextchar+'\"\n')
while nextchar == '':
if not self.filestack:
return self._StripComments(line), '', first_token, found_space
else:
self.pop_source()
nextchar = self.read_char()
if nextchar == '\n':
self.lineno += 1
if escaped_state:
escaped_state = False
else:
if nextchar in self.escape:
line += nextchar
escaped_state = True
else:
escaped_state = False
if not escaped_state:
if (nextchar in self.whitespace):
found_space = True
while first_token in self.source_triggers:
fname = RemoveOuterQuotes(self.get_token())
if (fname == '') or (fname in self.source_triggers):
raise InputError('Error: near '+self.error_leader()+'\n'
' Nonsensical file inclusion request.\n')
if self.debug >= 0:
sys.stderr.write( (' ' * recur_level) +
'reading file \"'+fname+'\"\n')
spec = self.sourcehook(fname)
if spec:
(fname, subfile) = spec
if ((first_token not in self.source_triggers_x) or
(fname not in self.source_files_restricted)):
self.push_source(subfile, fname)
if first_token in self.source_triggers_x:
self.source_files_restricted.add(fname)
else:
if self.debug >= 0:
sys.stderr.write('\nWarning at '+self.error_leader()+':\n'
' duplicate attempt to import file:\n \"'+fname+'\"\n')
line, nextchar, first_token, found_space = \
self._ReadLine(recur_level+1)
if nextchar in self.line_terminators:
line_nrw = line.rstrip(self.whitespace)
#sys.stderr.write('line_nrw=\"'+line_nrw+'\"\n')
if ((len(line_nrw) > 0) and
(line_nrw[-1] in self.line_extend_chars) and
((len(line_nrw) < 2) or (line_nrw[-2] not in self.escape))):
line = line_nrw[:-1] #delete the line_extend character
# from the end of that line and keep reading...
else:
return self._StripComments(line), nextchar, first_token, found_space
else:
line += nextchar
if not found_space:
first_token += nextchar
def ReadLine(self, recur_level=0):
line, nextchar, first_token, found_space = \
self._ReadLine(recur_level)
if nextchar == self.eof:
self.end_encountered = True
return line + nextchar
@staticmethod
def TextBlock2Lines(text, delimiters, keep_delim=True):
""" This splits a string into a list of sub-strings split by delimiter
characters. This function is different from the standard str.split()
function: The string is split at every character which belongs to the
"delimiters" argument (which can be a string or some other container).
This character is included at the end of every substring. Example:
TextBlock2Lines('\nabc\nde^fg\nhi j\n', '^\n')
returns:
['\n', 'abc\n', 'de^', 'fg\n', 'hi j\n']
"""
ls = []
i = 0
i_prev = 0
while i < len(text):
if text[i] in delimiters:
if keep_delim:
ls.append(text[i_prev:i+1])
else:
ls.append(text[i_prev:i])
i_prev = i+1
i += 1
if (i_prev < len(text)):
ls.append(text[i_prev:i+1])
return ls
def __iter__(self):
return self
def __next__(self):
line = self.ReadLine()
if line == self.eof:
raise StopIteration
return line
class OSrcLoc(object):
""" OSrcLoc is barely more than a 2-tuple containing the name of a file
(a string) and a particular line number inside that file (an integer).
These objects are passed around and stored in the nodes of
every tree, so that if a syntax error or broken link in that node
is discovered, an error message can be provided to the user.
"order"
Later on, during development, the "order" member was added. Why:
If you want to know whether block of text comes before or after a
different block of text, unfortunately you can not just compare the
corresponding line numbers of the files they come from because the
files may differ, and multiple short blocks of text may occupy the
same line. Consequently, "OSrcLoc" also maintains an internal
counter which keeps track of how many OSrcLoc() objects have been
created so far. (This can be useful if the user requests that
variables and commands be assigned in a non-standard order.)
The "order" member is assigned to this counter.
Most of the time, the "order" member can be ignored.
"""
__slots__=["infile","lineno","order"]
count = 0
def __init__(self, infile='', lineno=-1):
self.infile = infile
self.lineno = lineno
OSrcLoc.count += 1
self.order = OSrcLoc.count
def __lt__(self, x):
return self.order < x.order
#def __repr__(self):
# return repr((self.infile, self.lineno, self.order))
class TextBlock(object):
"""TextBlock is just a 3-tuple consisting of a string, and an OSrcLoc
to help locate it in the original file from which it was read."""
__slots__=["text","srcloc"]
def __init__(self, text, srcloc): #srcloc_end):
self.text = text
if srcloc == None:
self.srcloc = OSrcLoc()
else:
self.srcloc = srcloc
#if srcloc_end == None:
# self.srcloc_end = OSrcLoc()
#else:
# self.srcloc_end = srcloc_end
def __repr__(self):
return '\"'+self.text+'\"'
class VarRef(object):
"""VarRef stores variable names, and paths, and other attribute information,
as well as a "OSrcLoc" to keep track of the file it was defined in."""
__slots__=["prefix","descr_str","suffix","srcloc","binding","nptr"]
def __init__(self,
prefix = '', # '$' or '${'
descr_str = '', # <- descriptor string: "cpath/category:lpath"
suffix = '', # '}'
srcloc = None,# location in file where defined
binding = None,# a pointer to a tuple storing the value
nptr = None):# <- see class VarNPtr
self.prefix = prefix #Any text before the descriptor string goes here
self.suffix = suffix #Any text after the descriptor string goes here
self.descr_str = descr_str
if srcloc == None: # <- Location in text file where variable appears
self.srcloc = OSrcLoc()
else:
self.srcloc = srcloc
self.binding = binding
if nptr == None:
self.nptr = VarNPtr()
else:
self.nptr = nptr
def __lt__(self, x):
return self.order < x.order
#def __repr__(self):
# return repr((self.prefix + self.descr_str + self.suffix, srcloc))
class VarNPtr(object):
"""
Every time a variable appears in a template, it has has a "descritpor".
For example, consider the variable
"$atom:CA"
This is a string which encodes 3 pieces of information.
1) the category name: This is essentialy indicates the variable's type.
(ie "atom", in the example above)
2) the category node: Some TYPES have limited scope. Users can
specify the root node of the portion of the tree
in which this variable's type makes sense.
If this node is the root node, then that category
is relevant everywhere, and is not molecule or class
specific. All variables have a category node, which
is often not explicitly defined to by the user.
It must be inferred/determined.)
(Category node = the root "/", in the example above.)
3) the leaf node: This is a node whose ".name" member matches the name
of a variable. This node is created for this purpose
and it's position in the tree is a reflection of
that variable's intended scope.
In a molecule this "name" might be the name
of a type of atom, or an atom ID, or a bond type,
which is found in a particular molecule.
(Leaf node would be named "CA" in the example above.)
The VarNPtr class is simply a 3-tuple which
keeps these 3 pieces of data together.
"""
__slots__=["cat_name","cat_node","leaf_node"]
def __init__(self, cat_name='', cat_node=None, leaf_node=None):
self.cat_name = cat_name
self.cat_node = cat_node
self.leaf_node = leaf_node
#def __repr__(self):
# return repr((self.cat_name, self.cat_node.name, self.leaf_node.name))
class VarBinding(object):
""" VarBinding is essentially a tuple consistng of (full_name, binding, refs):
"self.full_name" is canonical name for this variable. This is a string
which specifies full path leading to the category node (beginning with '/'),
the category name (followed by a ':'),
as well as the leaf node (including the path leading up to it from cat_node)
This triplet identifies the variable uniquely.
"self.value" is the data that the variable refers to (usually a string).
"self.refs" stores a list of VarRefs which mention the same variable
from the various places inside various templates in the tree.
"""
__slots__=["full_name","nptr","value","refs","order","category"]
def __init__(self,
full_name = '',
nptr = None,
value = None,
refs = None,
order = None,
category = None):
self.full_name = full_name
self.nptr = nptr
self.value = value
self.refs = refs
self.order = order
self.category = category
def __lt__(self, x):
return self.order < x.order
def __repr__(self):
return repr((self.full_name, self.value, self.order))
def ExtractCatName(descr_str):
""" When applied to a VarRef's "descr_str" member,
this function will extract the "catname" of it's corresponding
"nptr" member. This can be useful for error reporting.
(I use it to insure that the user is using the correct counter
variable types at various locations in their input files.)
"""
ib = descr_str.find(':')
if ib == -1:
ib = len(descr_str)
ia = descr_str.rfind('/')
if ia == -1:
ia = 0
return descr_str[ia:ib]
else:
str_before_colon = descr_str[0:ib]
ia = str_before_colon.rfind('/')
if ia == -1:
return str_before_colon
else:
return str_before_colon[ia+1:]
def _DeleteLineFromTemplate(tmpl_list,
i_entry, # index into tmpl_list
newline_delimiter='\n'):
""" Delete a single line from tmpl_list.
tmpl_list is an alternating list of VarRefs and TextBlocks.
To identify the line, the index corresponding to one of the
entries in the tmpl_list is used. (Usually it is a VarRef)
The text after the preceeding newline, and the text up to the next newline
(starting from the beginning of the current entry, if a TextBlock)
is deleted, including any VarRef (variables) located in between.
It returns the index corresponding to the next
entry in the list (after deletion).
"""
i_prev_newline = i_entry
while i_prev_newline >= 0:
entry = tmpl_list[i_prev_newline]
if isinstance(entry, TextBlock):
i_char_newline = entry.text.rfind(newline_delimiter)
if i_char_newline != -1: # then newline found
# Delete the text after this newline
entry.text = entry.text[:i_char_newline+1]
break
i_prev_newline -= 1
first_var = True
#i_next_newline = i_entry
i_next_newline = i_prev_newline+1
while i_next_newline < len(tmpl_list):
entry = tmpl_list[i_next_newline]
if isinstance(entry, TextBlock):
i_char_newline = entry.text.find(newline_delimiter)
if i_char_newline != -1: # then newline found
# Delete the text before this newline (including the newline)
entry.text = entry.text[i_char_newline+1:]
break
# Invoke DeleteSelf() on the first variables on this line. This will
# insure that it is deleted from the ttree_assignments.txt file.
elif isinstance(entry, VarRef):
if first_var:
entry.nptr.leaf_node.DeleteSelf()
first_var = False
i_next_newline += 1
del tmpl_list[i_prev_newline + 1 : i_next_newline]
return i_prev_newline + 1
def DeleteLinesWithBadVars(tmpl_list,
delete_entire_template = False,
newline_delimiter = '\n'):
"""
Loop through the entries in a template,
an alternating list of TextBlocks and VarRefs (tmpl_list).
If a VarRef points to a leaf_node which no longer exists
(ie. no longer in the corresponding category's .bindings list).
Then delete the line it came from from the template (tmpl_list).
"""
out_str_list = []
i = 0
while i < len(tmpl_list):
entry = tmpl_list[i]
if isinstance(entry, VarRef):
var_ref = entry
var_bindings = var_ref.nptr.cat_node.categories[var_ref.nptr.cat_name].bindings
#if var_ref.nptr.leaf_node not in var_bindings:
if var_ref.nptr.leaf_node.IsDeleted():
if delete_entire_template:
del tmpl_list[:]
return 0
else:
i = _DeleteLineFromTemplate(tmpl_list,
i,
newline_delimiter)
else:
i += 1
else:
i += 1
def SplitTemplate(ltmpl, delim, delete_blanks = False):
"""
Split a template "ltmpl" into a list of "tokens" (sub-templates)
using a single delimiter string "delim".
INPUT arguments:
"ltmpl" should be an list of TextBlocks and VarRefs.
"delim" should be a simple string (type str)
"delete_blanks" should be a boolean True/False value.
When true, successive occurrences of the delimiter
should not create blank entries in the output list.
OUTPUT:
A list of tokens.
Each "token" is either a TextBlock, a VarRef,
or a (flat, 1-dimensional) list containing more than one of these objects.
The number of "tokens" returned equals the number of times the delimiter
is encountered in any of the TextBlocks in the "ltmpl" argument, plus one.
(... Unless "delete_blanks" is set to True.
Again, in that case, empty entries in this list are deleted.)
"""
assert(type(delim) is str)
if not hasattr(ltmpl, '__len__'):
ltmpl = [ltmpl]
tokens_lltmpl = []
token_ltmpl = []
i = 0
while i < len(ltmpl):
entry = ltmpl[i]
if isinstance(entry, TextBlock):
#if hasattr(entry, 'text'):
prev_src_loc = entry.srcloc
tokens_str = entry.text.split(delim)
lineno = entry.srcloc.lineno
j = 0
while j < len(tokens_str):
token_str = tokens_str[j]
delim_found = False
if (j < len(tokens_str)-1):
delim_found = True
if token_str == '':
if delete_blanks:
if delim == '\n':
lineno += 1
if len(token_ltmpl) > 0:
if len(token_ltmpl) == 1:
tokens_lltmpl.append(token_ltmpl[0])
else:
tokens_lltmpl.append(token_ltmpl)
del token_ltmpl
token_ltmpl = []
j += 1
continue
new_src_loc = OSrcLoc(prev_src_loc.infile, lineno)
new_src_loc.order = prev_src_loc.order
for c in token_str:
# Reminder to self: c != delim (so c!='\n' if delim='\n')
# (We keep track of '\n' characters in delimiters above.)
if c == '\n':
lineno +=1
new_src_loc.lineno = lineno
text_block = TextBlock(token_str,
new_src_loc)
prev_src_loc = new_src_loc
if len(token_ltmpl) == 0:
if delim_found:
tokens_lltmpl.append(text_block)
del token_ltmpl
token_ltmpl = []
else:
token_ltmpl.append(text_block)
else:
if delim_found:
if len(token_str) > 0:
token_ltmpl.append(text_block)
tokens_lltmpl.append(token_ltmpl)
del token_ltmpl
token_ltmpl = []
else:
assert(not delete_blanks)
if (isinstance(token_ltmpl[-1], VarRef)
and
((j>0)
or
((j == len(tokens_str)-1) and
(i == len(ltmpl)-1))
)):
# In that case, this empty token_str corresponds
# to a delimiter which was located immediately
# after the variable name,
# AND
# -there is more text to follow,
# OR
# -we are at the end of the template.
token_ltmpl.append(text_block)
if len(token_ltmpl) == 1:
tokens_lltmpl.append(token_ltmpl[0])
else:
tokens_lltmpl.append(token_ltmpl)
del token_ltmpl
token_ltmpl = []
else:
token_ltmpl.append(text_block)
if (delim_found and (delim == '\n')):
lineno += 1
j += 1
elif isinstance(entry, VarRef):
#elif hasattr(entry, 'descr_str'):
lineno = entry.srcloc.lineno
if ((len(token_ltmpl) == 1) and
isinstance(token_ltmpl[0], TextBlock) and
(len(token_ltmpl[0].text) == 0)):
# special case: if the previous entry was "", then it means
# the delimeter appeared at the end of the previous text block
# leading up to this variable. It separates the variable from
# the previous text block. It is not a text block of length 0.
token_ltmpl[0] = entry
else:
token_ltmpl.append(entry)
elif entry == None:
token_ltmpl.append(entry)
else:
assert(False)
i += 1
# Append left over remains of the last token
if len(token_ltmpl) == 1:
tokens_lltmpl.append(token_ltmpl[0])
elif len(token_ltmpl) > 1:
tokens_lltmpl.append(token_ltmpl)
del token_ltmpl
return tokens_lltmpl
def SplitTemplateMulti(ltmpl, delims, delete_blanks=False):
"""
Split a template "ltmpl" into a list of templates using a
single one or more delimiter strings "delim_list".
If multiple delimiter strings are provided, splitting
begins using the first delimiter string in the list.
Then each token in the resulting list of templates
is split using the next delimiter string
and so on until we run out of delimiter strings.
"ltmpl" should be an list of TextBlocks and VarRefs.
"delims" should be a simple string (type str) or a list of strings
"delete_blanks" is either True or False
If True, then any blank entries in the resulting list of
tokens (sub-templates) will be deleted.
"""
if hasattr(delims, '__len__'): # then it hopefully is a list of strings
delim_list = delims
else:
delim_list = [delims] # then it hopefully is a string
tokens = [ltmpl]
for delim in delim_list:
assert(type(delim) is str)
tokens_il = []
for t in tokens:
sub_tokens = SplitTemplate(t, delim, delete_blanks)
for st in sub_tokens:
if hasattr(st, '__len__'):
if (len(st) > 0) or (not delete_blanks):
tokens_il.append(st)
else:
tokens_il.append(st)
tokens = tokens_il
del tokens_il
return tokens
def _TableFromTemplate(d, ltmpl, delimiters, delete_blanks):
"""
See the docstring for the TableFromTemplate() function for an explanation.
(This _TableFromTemplate() and SplitTemplate() are the workhorse functions
for TableFromTemplate().)
"""
output = SplitTemplateMulti(ltmpl, delimiters[d], delete_blanks[d])
if d > 0:
i = 0
while i < len(output):
output[i] = _TableFromTemplate(d-1,
output[i],
delimiters,
delete_blanks)
# Delete empty LISTS?
if (delete_blanks[d] and
hasattr(output[i], '__len__') and
(len(output[i]) == 0)):
del output[i]
else:
i += 1
return output
def TableFromTemplate(ltmpl, delimiters, delete_blanks=True):
"""
This function can be used to split a template
(a list containing TextBlocks and VarRefs) into a table
into a multidimensional table, with an arbitrary number of dimensions.
Arguments:
ltmpl
An alternating list of TextBlocks and VarRefs containing
the contents of this text template.
delimiters
The user must supply a list or tuple of delimiters: one delimiter for
each dimension in the table, with low-priority delimiters
(such as spaces ' ') appearing first, and higher-priority delimiters
(sich as newlines '\n') appearing later on in the list.
This function will divide the entire "ltmpl" into an n-dimensional
table. Initially the text is split into a list of text using the
highest-priority delimiter. Then each entry in the resulting list is
split into another list according to the next highest-priority delimiter.
This continues until all of the delimiters are used up and an
n-dimensional list-of-lists is remaining.
delete_blanks
The optional "delete_blanks" argument can be used to indicate whether
or not to delete blank entries in the table (which occur as a result
of placing two delimiters next to each other). It should be either
None (default), or it should be an array of booleans matching the
size of the "delimiters" argument. This allows the caller to customize
the merge settings separately for each dimension (for example: to allow
merging of whitespace within a line, without ignoring blank lines).
---- Details: ----
1) Multi-character delimiters ARE allowed (like '\n\n').
2) If a delimiter in the "delimiters" argument is not a string
but is a tuple (or a list) of strings, then the text is split according
to any of the delimiters in that tuple/list (starting from the last entry).
This way, users can use this feature to split text according to multiple
different kinds of whitespace characters (such as ' ' and '\t'), for
example, buy setting delimiters[0] = (' ','\t'). If, additionally,
delete_blanks[0] == True, then this will cause this function to
divide text in without regard to whitespace on a given line (for example).
Detailed example:
table2D = TableFromTmplList(ltmpl,
delimiters = ((' ','\t'), '\n'),
delete_blanks = (True, False))
This divides text in a similar way that the "awk" program does by default,
ie, by ignoring various kinds of whitespace between text fields, but NOT
ignoring blank lines.
3) Any text contained in variable-names is ignored.
"""
# Make a copy of ltmpl
# (The workhorse function "_TableFromTemplate()" makes in-place changes to
# its "ltmpl" argument. I don't want to modify "ltmpl", so I make a copy
# of it before I invoke "_TableFromTemplate()" on it.)
output = [ltmpl[i] for i in range(0, len(ltmpl))]
d = len(delimiters) - 1
output = _TableFromTemplate(d, output, delimiters, delete_blanks)
return output
class TemplateLexer(TtreeShlex):
""" This class extends the standard python lexing module, shlex, adding a
new member function (ReadTemplate()), which can read in a block of raw text,
(halting at an (non-escaped) terminal character), and split the text into
alternating blocks of text and variables. (As far as this lexer is
concerned, "variables" are simply tokens preceeded by $ or @ characters,
and surrounded by optional curly-brackets {}.)
"""
def __init__(self,
instream=None,
infile=None,
posix=False):
TtreeShlex.__init__(self, instream, infile, posix)
self.var_delim = '$@' #characters which can begin a variable name
self.var_open_paren = '{' #optional parenthesis surround a variable
self.var_close_paren = '}' #optional parenthesis surround a variable
self.newline = '\n'
self.comment_skip_var = '#'
# Which characters belong in words?
#
# We want to allow these characters:
# ./$@&%^!*~`-_:;?<>[]()
# to appear inside the tokens that TtreeShlex.get_token()
# retrieves (TtreeShlex.get_token() is used to read class
# names, and instance names, and variable names)
#
# settings.lex.wordchars+='./$@&%^!*~`-_+:;?<>[]' #Allow these chars
#
# Ommisions:
# Note: I left out quotes, whitespace, comment chars ('#'), and escape
# characters ('\\') because they are also dealt with separately.
# Those characters should not overlap with settings.lex.wordchars.
#
# Enabling unicode support requires that we override this choice
# by specifying "lex.wordterminators" instead of "wordchars".
#
# lex.wordterminators should be the (printable) set inverse of lex.wordchars
# I'm not sure which ascii characters are NOT included in the string above
# (We need to figure that out, and put them in settings.lex.wordterminators)
# To figure that out, uncomment the 8 lines below:
#
#self.wordterminators=''
#for i in range(0,256):
# c = chr(i)
# if c not in self.wordchars:
# self.wordterminators += c
#sys.stderr.write('-------- wordterminators = --------\n')
#sys.stderr.write(self.wordterminators+'\n')
#sys.stderr.write('-----------------------------------\n')
#
# Here is the result:
self.wordterminators = '(),={|}' + \
self.whitespace + \
self.quotes + \
self.escape + \
self.commenters
# Note:
# self.whitespace = ' \t\r\f\n'
# self.quotes = '\'"'
# self.escape = '\\'
# self.commenters = '#'
self.source_triggers=set(['include','import'])
self.source_triggers_x=set(['import'])
def GetSrcLoc(self):
return OSrcLoc(self.infile, self.lineno)
def ReadTemplate(self,
simplify_output=False,
terminators='}',
other_esc_chars='{',
keep_terminal_char = True):
"""
ReadTemplate() reads a block of text (between terminators)
and divides it into variables (tokens following a '$' or '@' character)
and raw text. This is similar to pythons string.Template(),
however it reads from streams (files), not strings, and it allows use
of more complicated variable names with multiple variable delimiters
(eg '$' and '@').
This readline()-like member function terminates when reaching a
user-specified terminator character character (second argument),
or when variable (eg: "$var"$ is encountered). The result is
a list of variable-separated text-blocks (stored in the first
argument). For example, the string:
"string with $var1 and $var2 variables.}" contains:
"string with ",
$var1,
" and ",
$var2,
" variables.}"
This simplifies the final process of rendering
(substituting text into) the text blocks later on.
Output:
This function returns a list of (alternating) blocks of
text, and variable names. Each entry in the list is either:
1) a text block:
Raw text is copied from the source, verbatim, along with
some additional data (filename and line numbers), to
help retroactively identify where the text came from
(in case a syntax error in the text is discovered later).
In this case, the list entry is stored as a list
The format (TextBlock) is similar to:
[text_string, ((filenameA,lineBegin), (filenameB,lineEnd))],
where the tuples, (filenameA,lineBegin) and (filenameB,lineEnd)
denote the source file(s) from which the text was read, and
line number at the beginning and ending of the text block.
(This information is useful for generating helpful error
messages. Note that the "TtreeShlex" class allows users to
combine multiple files transparently into one stream using
the "source" (or "sourcehook()") member. For this reason, it
is possible, although unlikely, that the text-block
we are reading could span multiple different files.)
2) a variable (for example "$var" or "${var}"):
In this case, the list entry is stored in the "VarRef" format
which is essentialy shown below:
[[var_prefix, var_nptr, var_suffix], (filename,lineno)]
where var_prefix and var_suffix are strings containing brackets
and other text enclosing the variable name (and may be empty).
As an example, we consider a file named "datafile" which
contains the text containing 2 text blocks and 1 variable:
"some\n text\n before ${var}. Text after\n".
ReadTemplate() will read this and return a list with 3 entries:
[ ['some\n text\n before', (('datafile', 1), ('datafile', 3))],
[['${', 'var', '}'], ('datafile', 3, 3)],
['Text after\n', (('datafile', 3), ('datafile', 4))] ]
Note that while parsing the text, self.lineno counter is
incremented whenever a newline character is encountered.
(Also: Unlike shlex.get_token(), this function does not
delete commented text, or insert text from other files.)
Exceptional Cases:
Terminator characters are ignored if they are part of a variable
reference. (For example, the '}' in "${var}", is used to denote a
bracketed variable, and does not cause ReadTemplate() to stop reading)
OR if they are part of a two-character escape sequence
(for example, '}' in "\}" does not cause terminate parsing).
In that case, the text is considered normal text. (However the
'\' character is also stripped out. It is also stripped out if it
preceeds any characters in "other_esc_chars", which is
the second argument. Otherwise it is left in the text block.)
"""
#print(' ReadTemplate('+terminators+') invoked at '+self.error_leader())
# The main loop of the parser reads only one variable at time.
# The following variables keep track of where we are in the template.
reading_var=False # Are we currently reading in the name of a variable?
prev_char_delim=False #True iff we just read a var_delim character like '$'
escaped_state=False #True iff we just read a (non-escaped) esc character '\'
commented_state=False #True iff we are in a region of text where vars should be ignored
var_paren_depth=0 # This is non-zero iff we are inside a
# bracketed variable's name for example: "${var}"
var_terminators = self.whitespace + self.newline + self.var_delim + '{}'
tmpl_list = [] # List of alternating tuples of text_blocks and
# variable names (see format comment above)
# This list will be returned to the caller.
#sys.stderr.write('report_progress='+str(report_progress))
prev_filename = self.infile
prev_lineno = self.lineno
var_prefix = ''
var_descr_plist = []
var_suffix = ''
text_block_plist = []
done_reading = False
while not done_reading:
terminate_text = False
terminate_var = False
#delete_prior_escape = False
nextchar = self.read_char()
#print(' ReadTemplate() nextchar=\''+nextchar+'\' at '+self.error_leader()+' esc='+str(escaped_state)+', pvar='+str(prev_char_delim)+', paren='+str(var_paren_depth))
# Count newlines:
if nextchar in self.newline:
commented_state = False
self.lineno += 1
elif ((nextchar in self.comment_skip_var) and
(not escaped_state)):
commented_state = True
# Check for end-of-file:
if nextchar == '':
if escaped_state:
raise InputError('Error: in '+self.error_leader()+'\n\n'
'No escaped character.')
if reading_var:
terminate_var = True
else:
terminate_text = True
done_reading = True
# --- Now process the character: ---
# What we do next depends on which "mode" we are in.
# If we are reading a regular text block (reading_var == False),
# then we keep appending characters onto the end of "text_block",
# checking for terminal characters, or variable delimiters.
# If we are reading a variable name (reading_var == True),
# then we append characters to the end of "var_descr_plist[]",
# checking for variable terminator characters, as well as
# parenthesis (some variables are surrounded by parenthesis).
elif reading_var:
if nextchar in terminators:
#sys.stdout.write(' ReadTemplate() readmode found terminator.\n')
if escaped_state:
# In this case, the '\' char was only to prevent terminating
# string prematurely, so delete the '\' character.
#delete_prior_escape = True
if not (nextchar in self.var_close_paren):
del var_descr_plist[-1]
var_descr_plist.append(nextchar)
elif not ((var_paren_depth>0) and (nextchar in self.var_close_paren)):
terminate_var = True
done_reading = True
if nextchar in self.var_open_paren: # eg: nextchar == '{'
#sys.stdout.write(' ReadTemplate() readmode found {.\n')
if escaped_state:
# In this case, the '\' char was only to prevent
# interpreting '{' as a variable prefix
#delete_prior_escape=True # so delete the '\' character
del var_descr_plist[-1]
var_descr_plist.append(nextchar)
else:
# "${var}" is a valid way to refer to a variable
if prev_char_delim:
var_prefix += nextchar
var_paren_depth = 1
# "${{var}}" is also a valid way to refer to a variable,
# (although strange), but "$va{r}" is not.
# Parenthesis (in bracketed variable names) must
# immediately follow the '$' character (as in "${var}")
elif var_paren_depth > 0:
var_paren_depth += 1
elif nextchar in self.var_close_paren:
#sys.stdout.write(' ReadTemplate() readmode found }.\n')
if escaped_state:
# In this case, the '\' char was only to prevent
# interpreting '}' as a variable suffix,
#delete_prior_escape=True #so skip the '\' character
if (nextchar not in terminators):
del var_descr_plist[-1]
var_descr_plist.append(nextchar)
else:
if var_paren_depth > 0:
var_paren_depth -= 1
if var_paren_depth == 0:
var_suffix = nextchar
terminate_var = True
elif nextchar in var_terminators:
#sys.stdout.write(' ReadTemplate() readmode found var_terminator \"'+nextchar+'\"\n')
if (escaped_state or (var_paren_depth>0)):
# In this case, the '\' char was only to prevent
# interpreting nextchar as a variable terminator
#delete_prior_escape = True # so skip the '\' character
del var_descr_plist[-1]
var_descr_plist.append(nextchar)
else:
terminate_var = True
elif nextchar in self.var_delim: # such as '$'
#sys.stdout.write(' ReadTemplate() readmode found var_delim.\n')
if escaped_state:
# In this case, the '\' char was only to prevent
# interpreting '$' as a new variable name
#delete_prior_escape = True # so skip the '\' character
del var_descr_plist[-1]
var_descr_plist.append(nextchar)
else:
prev_var_delim = True
# Then we are processing a new variable name
terminate_var = True
else:
var_descr_plist.append(nextchar)
prev_char_delim = False
else: # begin else clause for "if reading_var:"
# Then we are reading a text_block
if nextchar in terminators:
if escaped_state:
# In this case, the '\' char was only to prevent terminating
# string prematurely, so delete the '\' character.
#delete_prior_escape = True
del text_block_plist[-1]
text_block_plist.append(nextchar)
elif commented_state:
text_block_plist.append(nextchar)
else:
terminate_text = True
done_reading = True
elif nextchar in self.var_delim: # such as '$'
if escaped_state:
# In this case, the '\' char was only to prevent
# interpreting '$' as a variable prefix.
#delete_prior_escape=True #so delete the '\' character
del text_block_plist[-1]
text_block_plist.append(nextchar)
elif commented_state:
text_block_plist.append(nextchar)
else:
prev_char_delim = True
reading_var = True
var_paren_depth = 0
terminate_text = True
else:
text_block_plist.append(nextchar)
#TO DO: use "list_of_chars.join()" instead of '+='
prev_char_delim = False # the previous character was not '$'
# Now deal with "other_esc_chars"
#if escaped_state and (nextchar in other_esc_chars):
if escaped_state and (nextchar in other_esc_chars):
if reading_var:
#sys.stdout.write(' ReadTemplate: var_descr_str=\''+''.join(var_descr_plist)+'\'\n')
assert(var_descr_plist[-2] in self.escape)
del var_descr_plist[-2]
else:
#sys.stdout.write(' ReadTemplate: text_block=\''+''.join(text_block_plist)+'\'\n')
assert(text_block_plist[-2] in self.escape)
del text_block_plist[-2]
if terminate_text:
#sys.stdout.write('ReadTemplate() appending: ')
#sys.stdout.write(text_block)
#tmpl_list.append( [text_block,
# ((prev_filename, prev_lineno),
# (self.infile, self.lineno))] )
if simplify_output:
tmpl_list.append(''.join(text_block_plist))
else:
tmpl_list.append(TextBlock(''.join(text_block_plist),
OSrcLoc(prev_filename, prev_lineno)))
#, OSrcLoc(self.infile, self.lineno)))
if not done_reading:
# The character that ended the text block
# was a variable delimiter (like '$'), in which case
# we should put it (nextchar) in the variable's prefix.
var_prefix = nextchar
else:
var_prefix = ''
var_descr_plist = []
var_suffix = ''
prev_filename = self.infile
prev_lineno = self.lineno
del text_block_plist
text_block_plist = []
#gc.collect()
elif terminate_var:
# Print an error if we terminated in the middle of
# an incomplete variable name:
if prev_char_delim:
raise InputError('Error: near '+self.error_leader()+'\n\n'
'Null variable name.')
if var_paren_depth > 0:
raise InputError('Error: near '+self.error_leader()+'\n\n'
'Incomplete bracketed variable name.')
var_descr_str = ''.join(var_descr_plist)
# Now check for variable format modifiers,
# like python's ".rjust()" and ".ljust()".
# If present, then put these in the variable suffix.
if ((len(var_descr_plist)>0) and (var_descr_plist[-1]==')')):
#i = len(var_descr_plist)-1
#while i >= 0:
# if var_descr_plist[i] == '(':
# break
# i -= 1
i = var_descr_str.rfind('(')
if (((i-6) >= 0) and
((var_descr_str[i-6:i] == '.rjust') or
(var_descr_str[i-6:i] == '.ljust'))):
var_suffix =''.join(var_descr_plist[i-6:])+var_suffix
#var_descr_plist = var_descr_plist[:i-6]
var_descr_str = var_descr_str[:i-6]
# Process any special characters in the variable name
var_descr_str = EscCharStrToChar(var_descr_str)
#tmpl_list.append( [[var_prefix, var_descr_str, var_suffix],
# (self.infile, self.lineno)] )
if simplify_output:
tmpl_list.append(var_prefix + var_descr_str + var_suffix)
else:
tmpl_list.append( VarRef(var_prefix, var_descr_str, var_suffix,
OSrcLoc(self.infile, self.lineno)) )
#if report_progress:
#sys.stderr.write(' parsed variable '+var_prefix+var_descr_str+var_suffix+'\n')
#sys.stdout.write('ReadTemplate() appending: ')
#print(var_prefix + var_descr_str + var_suffix)
del var_descr_plist
del var_descr_str
prev_filename = self.infile
prev_lineno = self.lineno
var_prefix = ''
var_descr_plist = []
var_suffix = ''
# Special case: Variable delimiters like '$'
# terminate the reading of variables,
# but they also signify that a new
# variable is being read.
if nextchar in self.var_delim:
# Then we are processing a new variable name
prev_var_delim = True
reading_var = True
var_paren_depth = 0
var_prefix = nextchar
elif nextchar in self.var_close_paren:
del text_block_plist
text_block_plist = []
#gc.collect()
prev_var_delim = False
reading_var = False
else:
# Generally, we don't want to initialize the next text block
# with the empty string. Consider that whatever character
# caused us to stop reading the previous variable and append
# it to the block of text that comes after.
del text_block_plist
text_block_plist = [nextchar]
#gc.collect()
prev_var_delim = False
reading_var = False
# If we reached the end of the template (and the user requests it),
# then the terminal character can be included in the list
# of text_blocks to be returned to the caller.
if done_reading and keep_terminal_char:
#sys.stdout.write('ReadTemplate() appending: \''+nextchar+'\'\n')
# Here we create a new text block which contains only the
# terminal character (nextchar).
#tmpl_list.append( [nextchar,
# ((self.infile, self.lineno),
# (self.infile, self.lineno))] )
if simplify_output:
tmpl_list.append(nextchar)
else:
tmpl_list.append(TextBlock(nextchar,
OSrcLoc(self.infile, self.lineno)))
#, OSrcLoc(self.infile, self.lineno)))
if escaped_state:
escaped_state = False
else:
if nextchar in self.escape:
escaped_state = True
#print("*** TMPL_LIST0 = ***", tmpl_list)
return tmpl_list # <- return value stored here
def GetParenExpr(self, prepend_str='', left_paren='(', right_paren=')'):
""" GetParenExpr() is useful for reading in strings
with nested parenthesis and spaces.
This function can read in the entire string:
.trans(0, 10.0*sin(30), 10.0*cos(30))
(Because I was too lazy to write this correctly...)
Spaces are currently stripped out of the expression.
(...unless surrounded by quotes) The string above becomes:
".trans(0,10.0*sin(30),10.0*cos(30))"
Sometimes the caller wants to prepend some text to the beginning
of the expression (which may contain parenthesis). For this
reason, an optional first argument ("prepend_str") can be
provided. By default it is empty.
"""
orig_wordterm = self.wordterminators
self.wordterminators = self.wordterminators.replace(left_paren,'').replace(right_paren,'')
token = self.get_token()
if ((token == '') or
(token == self.eof)):
return prepend_str
expr_str = prepend_str + token
#if (expr_str.find(left_paren) == -1):
# raise InputError('Error near or before '+self.error_leader()+'\n'
# 'Expected an open-paren (\"'+prepend_str+left_paren+'\") before this point.\n')
# return expr_str
paren_depth = expr_str.count(left_paren) - expr_str.count(right_paren)
while ((len(expr_str) == 0) or (paren_depth > 0)):
token = self.get_token()
if ((type(token) is not str) or
(token == '')):
raise InputError('Error near or before '+self.error_leader()+'\n'
'Invalid expression: \"'+expr_str+'\"')
expr_str += token
paren_depth = expr_str.count(left_paren) - expr_str.count(right_paren)
if (paren_depth != 0):
raise InputError('Error near or before '+self.error_leader()+'\n'
'Invalid expression: \"'+expr_str+'\"')
self.wordterminators = orig_wordterm
return expr_str
if __name__ == '__main__':
if len(sys.argv) == 1:
lexer = TtreeShlex()
else:
file = sys.argv[1]
lexer = TtreeShlex(open(file), file)
while 1:
tt = lexer.get_token()
if tt:
print("Token: " + repr(tt))
else:
break

Event Timeline