ttree_lex.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Thu, Jul 25, 10:42

ttree_lex.py
View Options

This document is not UTF8. It was detected as ISO-8859-1 (Latin 1) and converted to UTF8 for display.

	# -- coding: iso-8859-1 --

	# Author: Andrew Jewett (jewett.aij at g mail)
	# http://www.chem.ucsb.edu/~sheagroup
	# License: 3-clause BSD License (See LICENSE.TXT)
	# Copyright (c) 2012, Regents of the University of California
	# All rights reserved.

	"""A lexical analyzer class for simple shell-like syntaxes.
	This version has been modified slightly to work better with unicode.
	It was forked from the version of shlex that ships with python 3.2.2.
	A few minor features and functions have been added. """

	# Module and documentation by Eric S. Raymond, 21 Dec 1998
	# Input stacking and error message cleanup added by ESR, March 2000
	# push_source() and pop_source() made explicit by ESR, January 2001.
	# Posix compliance, split(), string arguments, and
	# iterator interface by Gustavo Niemeyer, April 2003.
	# ("wordterminators" (unicode support) hack by Andrew Jewett September 2011)

	import os.path
	import sys
	from collections import deque
	import re, fnmatch
	import string
	#import gc


	try:
	from cStringIO import StringIO
	except ImportError:
	try:
	from StringIO import StringIO
	except ImportError:
	from io import StringIO

	__all__ = ["TtreeShlex",
	"split",
	"LineLex",
	"SplitQuotedString",
	"EscCharStrToChar",
	"SafelyEncodeString",
	"RemoveOuterQuotes",
	"MaxLenStr",
	"HasWildCard",
	#"IsRegex",
	"InputError",
	"ErrorLeader",
	"SrcLoc",
	"OSrcLoc",
	"TextBlock",
	"VarRef",
	"VarNPtr",
	"VarBinding",
	"SplitTemplate",
	"SplitTemplateMulti",
	"TableFromTemplate",
	"ExtractCatName",
	#"_TableFromTemplate",
	#"_DeleteLineFromTemplate",
	"DeleteLinesWithBadVars",
	"TemplateLexer"]




	class TtreeShlex(object):
	""" A lexical analyzer class for simple shell-like syntaxes.
	TtreeShlex is a backwards-compatible version of python's standard shlex
	module. It has the additional member: "self.wordterminators", which
	overrides the "self.wordchars" member. This enables better handling of
	unicode characters by allowing a much larger variety of characters to
	appear in words or tokens parsed by TtreeShlex.

	"""

	custom_path = None

	def __init__(self,
	instream=None,
	infile=None,
	custom_include_path=None,
	posix=False):
	if isinstance(instream, str):
	instream = StringIO(instream)
	if instream is not None:
	self.instream = instream
	self.infile = infile
	else:
	self.instream = sys.stdin
	self.infile = None
	self.posix = posix
	if posix:
	self.eof = None
	else:
	self.eof = ''
	self.commenters = '#'
	self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
	'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
	if self.posix:
	self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
	'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')

	self.wordterminators = set([]) #WORDTERMINATORS
	self.prev_space_terminator = '' #WORDTERMINATORS
	self.whitespace = ' \t\r\f\n'
	self.whitespace_split = False
	self.quotes = '\'"'
	self.escape = '\\'
	self.escapedquotes = '"'
	self.state = ' '
	self.pushback = deque()
	self.lineno = 1
	self.debug = 0
	self.token = ''
	self.filestack = deque()
	# self.source_triggers
	# are tokens which allow the seamless insertion of other
	# files into the file being read.
	self.source_triggers=set(['source'])
	self.source_triggers_x=set([])
	#Note: self.source_triggers_x
	# This is a subset of self.source_triggers.
	# In this case file inclusion is exclusive.
	# In other words, if one of these tokens
	# is encountered, the file is only included
	# if it has not been included already.
	self.source_files_restricted = set([])
	self.include_path = []
	if TtreeShlex.custom_path:
	include_path_list = TtreeShlex.custom_path.split(':')
	self.include_path += [d for d in include_path_list if len(d)>0]
	if 'TTREE_PATH' in os.environ:
	include_path_list = os.environ['TTREE_PATH'].split(':')
	self.include_path += [d for d in include_path_list if len(d)>0]
	if self.debug:
	print('TtreeShlex: reading from %s, line %d' \
	% (self.instream, self.lineno))
	self.end_encountered = False


	@staticmethod #WORDTERMINATORS
	def _belongs_to(char, include_chars, exclude_chars): #WORDTERMINATORS
	if ((not exclude_chars) or (len(exclude_chars)==0)): #WORDTERMINATORS
	return char in include_chars #WORDTERMINATORS
	else: #WORDTERMINATORS
	return char not in exclude_chars #WORDTERMINATORS

	def push_raw_text(self, text):
	"""Push a block of text onto the stack popped by the ReadLine() method.
	(If multiple lines are present in the text, (which is determined by
	self.line_terminators), then the text is split into multiple lines
	and each one of them is pushed onto this stack individually.
	The "self.lineno" counter is also adjusted, depending on the number
	of newline characters in "line".
	Do not strip off the newline, or other line terminators
	at the end of the text block before using push_raw_text()!

	"""
	if self.debug >= 1:
	print("TtreeShlex: pushing token " + repr(text))
	for c in reversed(text): #WORDTERMINATORS
	self.pushback.appendleft(c) #WORDTERMINATORS
	if c == '\n': #WORDTERMINATORS
	self.lineno -= 1 #WORDTERMINATORS
	if len(text) > 0: #WORDTERMINATORS
	self.end_encountered = False #WORDTERMINATORS

	def push_token(self, text):
	"Push a token onto the stack popped by the get_token method"
	self.push_raw_text(text+self.prev_space_terminator)

	def push_source(self, newstream, newfile=None):
	"Push an input source onto the lexer's input source stack."
	if isinstance(newstream, str):
	newstream = StringIO(newstream)
	self.filestack.appendleft((self.infile, self.instream, self.lineno))
	self.infile = newfile
	self.instream = newstream
	self.lineno = 1
	if self.debug:
	if newfile is not None:
	print('TtreeShlex: pushing to file %s' % (self.infile,))
	else:
	print('TtreeShlex: pushing to stream %s' % (self.instream,))

	def pop_source(self):
	"Pop the input source stack."
	self.instream.close()
	(self.infile, self.instream, self.lineno) = self.filestack.popleft()
	if self.debug:
	print('TtreeShlex: popping to %s, line %d' \
	% (self.instream, self.lineno))
	self.state = ' '

	def get_token(self):
	"Get a token from the input stream (or from stack if it's nonempty)"
	#### #CHANGING: self.pushback is now a stack of characters, not tokens #WORDTERMINATORS
	#### if self.pushback: #WORDTERMINATORS
	#### tok = self.pushback.popleft() #WORDTERMINATORS
	#### if self.debug >= 1: #WORDTERMINATORS
	#### print("TtreeShlex: popping token " + repr(tok)) #WORDTERMINATORS
	#### return tok #WORDTERMINATORS
	#### No pushback. Get a token. #WORDTERMINATORS
	raw = self.read_token()
	# Handle inclusions
	if self.source_triggers is not None:
	while raw in self.source_triggers:
	fname=self.read_token()
	spec = self.sourcehook(fname)
	if spec:
	(newfile, newstream) = spec
	if ((raw not in self.source_triggers_x) or
	(newfile not in self.source_files_restricted)):
	self.push_source(newstream, newfile)
	if raw in self.source_triggers_x:
	self.source_files_restricted.add(newfile)
	else:
	if self.debug >= 0:
	sys.stderr.write('\ndebug warning: duplicate attempt to import file:\n \"'+newfile+'\"\n')
	raw = self.get_token()

	# Maybe we got EOF instead?
	while raw == self.eof:
	if not self.filestack:
	return self.eof
	else:
	self.pop_source()
	raw = self.get_token()
	# Neither inclusion nor EOF
	if self.debug >= 1:
	if raw != self.eof:
	print("TtreeShlex: token=" + repr(raw))
	else:
	print("TtreeShlex: token=EOF")

	if raw == self.eof: #WORDTERMINATORS
	self.end_encountered = True #WORDTERMINATORS

	return raw


	def read_char(self):
	if self.pushback: #WORDTERMINATORS
	nextchar = self.pushback.popleft() #WORDTERMINATORS
	assert((type(nextchar) is str) and (len(nextchar)==1)) #WORDTERMINATORS
	else: #WORDTERMINATORS
	nextchar = self.instream.read(1) #WORDTERMINATORS
	return nextchar


	def read_token(self):
	self.prev_space_terminator = '' #WORDTERMINATORS
	quoted = False
	escapedstate = ' '
	while True:
	#### self.pushback is now a stack of characters, not tokens #WORDTERMINATORS
	nextchar = self.read_char()
	if nextchar == '\n':
	self.lineno = self.lineno + 1
	if self.debug >= 3:
	print("TtreeShlex: in state", repr(self.state), \
	"I see character:", repr(nextchar))
	if self.state is None:
	self.token = '' # past end of file
	break
	elif self.state == ' ':
	if not nextchar:
	self.state = None # end of file
	break
	elif nextchar in self.whitespace:
	if self.debug >= 2:
	print("TtreeShlex: I see whitespace in whitespace state")
	if self.token or (self.posix and quoted):
	# Keep track of which whitespace
	# character terminated the token.
	self.prev_space_terminator = nextchar #WORDTERMINATORS
	break # emit current token
	else:
	continue
	elif nextchar in self.commenters:
	self.instream.readline()
	self.lineno = self.lineno + 1
	elif self.posix and nextchar in self.escape:
	escapedstate = 'a'
	self.state = nextchar
	elif TtreeShlex._belongs_to(nextchar, #WORDTERMINATORS
	self.wordchars, #WORDTERMINATORS
	self.wordterminators):#WORDTERMINATORS
	self.token = nextchar
	self.state = 'a'
	elif nextchar in self.quotes:
	if not self.posix:
	self.token = nextchar
	self.state = nextchar
	elif self.whitespace_split:
	self.token = nextchar
	self.state = 'a'
	else:
	self.token = nextchar
	if self.token or (self.posix and quoted):
	break # emit current token
	else:
	continue
	elif self.state in self.quotes:
	quoted = True
	if not nextchar: # end of file
	if self.debug >= 2:
	print("TtreeShlex: I see EOF in quotes state")
	# XXX what error should be raised here?
	raise ValueError("Error at or before "+self.error_leader()+"\n"
	" No closing quotation.")
	if nextchar == self.state:
	if not self.posix:
	self.token = self.token + nextchar
	self.state = ' '
	break
	else:
	self.state = 'a'
	elif self.posix and nextchar in self.escape and \
	self.state in self.escapedquotes:
	escapedstate = self.state
	self.state = nextchar
	else:
	self.token = self.token + nextchar
	elif self.state in self.escape:
	if not nextchar: # end of file
	if self.debug >= 2:
	print("TtreeShlex: I see EOF in escape state")
	# XXX what error should be raised here?
	raise ValueError("No escaped character")
	# In posix shells, only the quote itself or the escape
	# character may be escaped within quotes.
	if escapedstate in self.quotes and \
	nextchar != self.state and nextchar != escapedstate:
	self.token = self.token + self.state
	self.token = self.token + nextchar
	self.state = escapedstate
	elif self.state == 'a':
	if not nextchar:
	self.state = None # end of file
	break
	elif nextchar in self.whitespace:
	if self.debug >= 2:
	print("TtreeShlex: I see whitespace in word state")
	self.state = ' '
	if self.token or (self.posix and quoted):
	# Keep track of which whitespace
	# character terminated the token.
	self.prev_space_terminator = nextchar #WORDTERMINATORS
	break # emit current token
	else:
	continue
	elif nextchar in self.commenters:
	comment_contents = self.instream.readline()
	self.lineno = self.lineno + 1
	if self.posix:
	self.state = ' '
	if self.token or (self.posix and quoted):
	# Keep track of which character(s) terminated
	# the token (including whitespace and comments).
	self.prev_space_terminator = nextchar + comment_contents #WORDTERMINATORS
	break # emit current token
	else:
	continue
	elif self.posix and nextchar in self.quotes:
	self.state = nextchar
	elif self.posix and nextchar in self.escape:
	escapedstate = 'a'
	self.state = nextchar
	elif (TtreeShlex._belongs_to(nextchar, #WORDTERMINATORS
	self.wordchars, #WORDTERMINATORS
	self.wordterminators)#WORDTERMINATORS
	or (nextchar in self.quotes) #WORDTERMINATORS
	or (self.whitespace_split)): #WORDTERMINATORS
	self.token = self.token + nextchar
	else:
	self.pushback.appendleft(nextchar)
	if self.debug >= 2:
	print("TtreeShlex: I see punctuation in word state")
	self.state = ' '
	if self.token:
	break # emit current token
	else:
	continue
	result = self.token
	self.token = ''
	if self.posix and not quoted and result == '':
	result = None
	if self.debug > 1:
	if result:
	print("TtreeShlex: raw token=" + repr(result))
	else:
	print("TtreeShlex: raw token=EOF")
	return result

	def sourcehook(self, newfile):
	"Hook called on a filename to be sourced."
	newfile = RemoveOuterQuotes(newfile)
	# This implements cpp-like semantics for relative-path inclusion.
	if isinstance(self.infile, str) and not os.path.isabs(newfile):
	newfile_full = os.path.join(os.path.dirname(self.infile), newfile)
	try:
	f = open(newfile_full, "r")
	except IOError:
	# If not found,
	err = True
	# ...then check to see if the file is in one of the
	# directories in the self.include_path list.
	for d in self.include_path:
	newfile_full = os.path.join(d, newfile)
	try:
	f = open(newfile_full, "r")
	err = False
	break
	except IOError:
	err=True
	if err:
	raise InputError('Error at '+self.error_leader()+'\n'
	' unable to open file \"'+newfile+'\"\n'
	' for reading.\n')
	return (newfile, f)

	def error_leader(self, infile=None, lineno=None):
	"Emit a C-compiler-like, Emacs-friendly error-message leader."
	if infile is None:
	infile = self.infile
	if lineno is None:
	lineno = self.lineno
	return "\"%s\", line %d: " % (infile, lineno)

	def __iter__(self):
	return self

	def __next__(self):
	token = self.get_token()
	if token == self.eof:
	raise StopIteration
	return token

	def __bool__(self):
	return not self.end_encountered

	# For compatibility with python 2.x, I must also define:
	def __nonzero__(self):
	return self.__bool__()


	# The split() function was originally from shlex
	# It is included for backwards compatibility.
	def split(s, comments=False, posix=True):
	lex = TtreeShlex(s, posix=posix)
	lex.whitespace_split = True
	if not comments:
	lex.commenters = ''
	return list(lex)



	##################### NEW ADDITIONS (may be removed later) #################

	#"""
	# -- linelex.py --
	#linelex.py defines the LineLex class, which inherits from, and further
	#augments the capabilities of TtreeShlex by making it easier to parse
	#individual lines one at a time. (The original shlex's "source" inclusion
	#ability still works when reading entire lines, and lines are still counted.)
	#
	#"""

	#import sys


	class InputError(Exception):
	""" A generic exception object containing a string for error reporting.
	(Raising this exception implies that the caller has provided
	a faulty input file or argument.)

	"""

	def __init__(self, err_msg):
	self.err_msg = err_msg
	def __str__(self):
	return self.err_msg
	def __repr__(self):
	return str(self)


	def ErrorLeader(infile, lineno):
	return '\"'+infile+'\", line '+str(lineno)


	class SrcLoc(object):
	""" SrcLoc is essentially nothing more than a 2-tuple containing the name
	of a file (str) and a particular line number inside that file (an integer).

	"""
	__slots__=["infile","lineno"]
	def __init__(self, infile='', lineno=-1):
	self.infile = infile
	self.lineno = lineno




	def SplitQuotedString(string,
	quotes='\'\"',
	delimiters=' \t\r\f\n',
	escape='\\',
	comment_char='#'):
	tokens = []
	token = ''
	reading_token = True
	escaped_state = False
	quote_state = None
	for c in string:

	if (c in comment_char) and (not escaped_state) and (quote_state==None):
	tokens.append(token)
	return tokens

	elif (c in delimiters) and (not escaped_state) and (quote_state==None):
	if reading_token:
	tokens.append(token)
	token = ''
	reading_token = False

	elif c in escape:
	if escaped_state:
	token += c
	reading_token = True
	escaped_state = False
	else:
	escaped_state = True
	# and leave c (the '\' character) out of token
	elif (c in quotes) and (not escaped_state):
	if (quote_state != None):
	if (c == quote_state):
	quote_state = None
	else:
	quote_state = c
	token += c
	reading_token = True
	else:
	if (c == 'n') and (escaped_state == True):
	c = '\n'
	elif (c == 't') and (escaped_state == True):
	c = '\t'
	elif (c == 'r') and (escaped_state == True):
	c = '\r'
	elif (c == 'f') and (escaped_state == True):
	c = '\f'
	token += c
	reading_token = True
	escaped_state = False

	if len(string) > 0:
	tokens.append(token)
	return tokens


	def EscCharStrToChar(s_in, escape='\\'):
	"""
	EscCharStrToChar() replaces any escape sequences
	in a string with their 1-character equivalents.

	"""
	assert(len(escape) > 0)
	out_lstr = []
	escaped_state = False
	for c in s_in:
	if escaped_state:
	if (c == 'n'):
	out_lstr.append('\n')
	elif (c == 't'):
	out_lstr.append('\t')
	elif (c == 'r'):
	out_lstr.append('\r')
	elif (c == 'f'):
	out_lstr.append('\f')
	elif (c == '\''):
	out_lstr.append('\'')
	elif (c == '\"'):
	out_lstr.append('\"')
	elif c in escape:
	out_lstr.append(c)
	else:
	out_lstr.append(escape+c) # <- keep both characters
	escaped_state = False
	else:
	if c in escape:
	escaped_state = True
	else:
	out_lstr.append(c)

	return ''.join(out_lstr)



	def SafelyEncodeString(in_str,
	quotes='\'\"',
	delimiters=' \t\r\f\n',
	escape='\\',
	comment_char='#'):
	"""
	SafelyEncodeString(in_str) scans through the input string (in_str),
	and returns a new string in which probletic characters
	(like newlines, tabs, quotes, etc), are replaced by their two-character
	backslashed equivalents (like '\n', '\t', '\'', '\"', etc).
	The escape character is the backslash by default, but it too can be
	overridden to create custom escape sequences
	(but this does not effect the encoding for characters like '\n', '\t').

	"""
	assert(len(escape) > 0)
	out_lstr = []
	use_outer_quotes = False
	for c in in_str:
	if (c == '\n'):
	c = '\\n'
	elif (c == '\t'):
	c = '\\t'
	elif (c == '\r'):
	c = '\\r'
	elif (c == '\f'):
	c = '\\f'
	elif c in quotes:
	c = escape[0]+c
	elif c in escape:
	c = c+c
	elif c in delimiters:
	use_outer_quotes = True
	# hmm... that's all that comes to mind. Did I leave anything out?
	out_lstr.append(c)

	if use_outer_quotes:
	out_lstr = ['\"'] + out_lstr + ['\"']

	return ''.join(out_lstr)


	def RemoveOuterQuotes(text, quotes='\"\''):
	if ((len(text)>=2) and (text[0] in quotes) and (text[-1]==text[0])):
	return text[1:-1]
	else:
	return text



	def MaxLenStr(s1, s2):
	if len(s2) > len(s1):
	return s2
	else:
	return s1


	#def IsRegex(pat):
	# """
	# Check to see if string (pat) is bracketed by slashes.
	#
	# """
	# return (len(pat)>=2) and (pat[0]=='/') and (pat[-1] == '/')

	def HasWildCard(pat):
	"""
	Returns true if a string (pat) contains a '*' or '?' character.

	"""
	return (pat.find('*') != -1) or (pat.find('?') != -1)


	#def HasWildCard(pat):
	# """
	# Returns true if a string (pat) contains a non-backslash-protected
	# * or ? character.
	#
	# """
	# N=len(pat)
	# i=0
	# while i < N:
	# i = pat.find('*', i, N)
	# if i == -1:
	# break
	# elif (i==0) or (pat[i-1] != '\\'):
	# return True
	# i += 1
	# i=0
	# while i < N:
	# i = pat.find('?', i, N)
	# if i == -1:
	# break
	# elif (i==0) or (pat[i-1] != '\\'):
	# return True
	# i += 1
	# return False


	def MatchesPattern(s, pattern):
	if type(pattern) is str:
	#old code:
	#if ((len(s) > 1) and (s[0] == '/') and (s[-1] == '/'):
	# re_string = p[1:-1] # strip off the slashes '/' and '/'
	# if not re.search(re_string, s):
	# return False
	#new code:
	# uses precompiled regular expressions (See "pattern.search" below)
	if HasWildCard(pattern):
	if not fnmatch.fnmatchcase(s, pattern):
	return False
	elif s != pattern:
	return False
	else:
	#assert(type(p) is _sre.SRE_Match)
	# I assume pattern = re.compile(some_reg_expr)
	if not pattern.search(s):
	return False
	return True



	def MatchesAll(multi_string, pattern):
	assert(len(multi_string) == len(pattern))
	for i in range(0, len(pattern)):
	if not MatchesPattern(multi_string[i], pattern[i]):
	return False
	return True



	class LineLex(TtreeShlex):
	""" This class extends the TtreeShlex module (a slightly modified
	version of the python 3.2.2 version of shlex). LineLex has the
	ability to read one line at a time (in addition to one token at a time).
	(Many files and scripts must be parsed one line at a time instead of one
	token at a time. In these cases, the whitespace position also matters.)

	Arguably, this class might not be necessary.
	I could get rid of this class completely. That would be nice. To do that
	we would need to augment and generalize shlex's get_token() member function
	to make it read lines, not just tokens. Of course, you can always
	change the wordchars (or wordterminators). Even so, there are two other
	difficulties using the current version of shlex.get_token() to read lines:
	1) File inclusion happen whenever the beginning of a line/token matches one
	of the "source_triggers" (not the whole line as required by get_token()).
	2) Lines ending in a special character (by default the backslash character)
	continue on to the next line.
	This code seems to work on our test files, but I'm sure there are bugs.
	Andrew 2012-3-25

	"""
	def __init__(self,
	instream=None,
	infile=None,
	posix=False):
	TtreeShlex.__init__(self, instream, infile, posix)
	self.line_terminators = '\n'
	self.line_extend_chars = '\\'
	self.skip_comments_during_readline = True


	def _StripComments(self, line):
	if self.skip_comments_during_readline:
	for i in range(0, len(line)):
	if ((line[i] in self.commenters) and
	((i==0) or (line[i-1] not in self.escape))):
	return line[:i]
	return line



	def _ReadLine(self,
	recur_level=0):
	"""
	This function retrieves a block of text, halting at a
	terminal character. Escape sequences are respected.
	The self.lineno (newline counter) is also maintained.

	The main difference between Readline and get_token()
	is the way they handle the "self.source_triggers" member.
	Both Readline() and get_token() insert text from other files when they
	encounter a string in "self.source_triggers" in the text they read.
	However ReadLine() ONLY inserts text from other files if the token which
	matches with self.source_triggers appears at the beginning of the line.
	get_token() inserts text only if lex.source matches the entire token.

	comment-to-self:
	At some point, once I'm sure this code is working, I should replace
	shlex.get_token() with the code from ReadLine() which is more general.
	It would be nice to get rid of "class LineLex" entirely. ReadLine()
	is the only new feature that LineLex which was lacking in shlex.

	To do this I would need to add a couple optional arguments to
	"get_token()", allowing it to mimic ReadLine(), such as:
	"override_wordterms" argument (which we can pass a '\n'), and
	"token_extender" argument (like '\' for extending lines)

	"""
	first_token=''
	line = ''
	escaped_state = False
	found_space = False
	while True:
	nextchar = self.read_char()
	#sys.stderr.write('nextchar=\"'+nextchar+'\"\n')
	while nextchar == '':
	if not self.filestack:
	return self._StripComments(line), '', first_token, found_space
	else:
	self.pop_source()
	nextchar = self.read_char()
	if nextchar == '\n':
	self.lineno += 1

	if escaped_state:
	escaped_state = False
	else:
	if nextchar in self.escape:
	line += nextchar
	escaped_state = True
	else:
	escaped_state = False

	if not escaped_state:
	if (nextchar in self.whitespace):
	found_space = True
	while first_token in self.source_triggers:
	fname = RemoveOuterQuotes(self.get_token())
	if (fname == '') or (fname in self.source_triggers):
	raise InputError('Error: near '+self.error_leader()+'\n'
	' Nonsensical file inclusion request.\n')
	if self.debug >= 0:
	sys.stderr.write( (' ' * recur_level) +
	'reading file \"'+fname+'\"\n')
	spec = self.sourcehook(fname)
	if spec:
	(fname, subfile) = spec
	if ((first_token not in self.source_triggers_x) or
	(fname not in self.source_files_restricted)):
	self.push_source(subfile, fname)
	if first_token in self.source_triggers_x:
	self.source_files_restricted.add(fname)
	else:
	if self.debug >= 0:
	sys.stderr.write('\nWarning at '+self.error_leader()+':\n'
	' duplicate attempt to import file:\n \"'+fname+'\"\n')

	line, nextchar, first_token, found_space = \
	self._ReadLine(recur_level+1)


	if nextchar in self.line_terminators:
	line_nrw = line.rstrip(self.whitespace)
	#sys.stderr.write('line_nrw=\"'+line_nrw+'\"\n')
	if ((len(line_nrw) > 0) and
	(line_nrw[-1] in self.line_extend_chars) and
	((len(line_nrw) < 2) or (line_nrw[-2] not in self.escape))):
	line = line_nrw[:-1] #delete the line_extend character
	# from the end of that line and keep reading...
	else:
	return self._StripComments(line), nextchar, first_token, found_space
	else:
	line += nextchar
	if not found_space:
	first_token += nextchar



	def ReadLine(self, recur_level=0):
	line, nextchar, first_token, found_space = \
	self._ReadLine(recur_level)
	if nextchar == self.eof:
	self.end_encountered = True
	return line + nextchar


	@staticmethod
	def TextBlock2Lines(text, delimiters, keep_delim=True):
	""" This splits a string into a list of sub-strings split by delimiter
	characters. This function is different from the standard str.split()
	function: The string is split at every character which belongs to the
	"delimiters" argument (which can be a string or some other container).
	This character is included at the end of every substring. Example:
	TextBlock2Lines('\nabc\nde^fg\nhi j\n', '^\n')
	returns:
	['\n', 'abc\n', 'de^', 'fg\n', 'hi j\n']

	"""
	ls = []
	i = 0
	i_prev = 0
	while i < len(text):
	if text[i] in delimiters:
	if keep_delim:
	ls.append(text[i_prev:i+1])
	else:
	ls.append(text[i_prev:i])
	i_prev = i+1
	i += 1
	if (i_prev < len(text)):
	ls.append(text[i_prev:i+1])
	return ls

	def __iter__(self):
	return self

	def __next__(self):
	line = self.ReadLine()
	if line == self.eof:
	raise StopIteration
	return line


	class OSrcLoc(object):
	""" OSrcLoc is barely more than a 2-tuple containing the name of a file
	(a string) and a particular line number inside that file (an integer).
	These objects are passed around and stored in the nodes of
	every tree, so that if a syntax error or broken link in that node
	is discovered, an error message can be provided to the user.

	"order"
	Later on, during development, the "order" member was added. Why:
	If you want to know whether block of text comes before or after a
	different block of text, unfortunately you can not just compare the
	corresponding line numbers of the files they come from because the
	files may differ, and multiple short blocks of text may occupy the
	same line. Consequently, "OSrcLoc" also maintains an internal
	counter which keeps track of how many OSrcLoc() objects have been
	created so far. (This can be useful if the user requests that
	variables and commands be assigned in a non-standard order.)
	The "order" member is assigned to this counter.
	Most of the time, the "order" member can be ignored.

	"""

	__slots__=["infile","lineno","order"]

	count = 0

	def __init__(self, infile='', lineno=-1):
	self.infile = infile
	self.lineno = lineno
	OSrcLoc.count += 1
	self.order = OSrcLoc.count

	def __lt__(self, x):
	return self.order < x.order

	#def __repr__(self):
	# return repr((self.infile, self.lineno, self.order))



	class TextBlock(object):
	"""TextBlock is just a 3-tuple consisting of a string, and an OSrcLoc
	to help locate it in the original file from which it was read."""

	__slots__=["text","srcloc"]

	def __init__(self, text, srcloc): #srcloc_end):
	self.text = text
	if srcloc == None:
	self.srcloc = OSrcLoc()
	else:
	self.srcloc = srcloc
	#if srcloc_end == None:
	# self.srcloc_end = OSrcLoc()
	#else:
	# self.srcloc_end = srcloc_end

	def __repr__(self):
	return '\"'+self.text+'\"'



	class VarRef(object):
	"""VarRef stores variable names, and paths, and other attribute information,
	as well as a "OSrcLoc" to keep track of the file it was defined in."""

	__slots__=["prefix","descr_str","suffix","srcloc","binding","nptr"]

	def __init__(self,
	prefix = '', # '$' or '${'
	descr_str = '', # <- descriptor string: "cpath/category:lpath"
	suffix = '', # '}'
	srcloc = None,# location in file where defined
	binding = None,# a pointer to a tuple storing the value
	nptr = None):# <- see class VarNPtr

	self.prefix = prefix #Any text before the descriptor string goes here
	self.suffix = suffix #Any text after the descriptor string goes here
	self.descr_str = descr_str
	if srcloc == None: # <- Location in text file where variable appears
	self.srcloc = OSrcLoc()
	else:
	self.srcloc = srcloc

	self.binding = binding

	if nptr == None:
	self.nptr = VarNPtr()
	else:
	self.nptr = nptr

	def __lt__(self, x):
	return self.order < x.order

	#def __repr__(self):
	# return repr((self.prefix + self.descr_str + self.suffix, srcloc))


	class VarNPtr(object):
	"""
	Every time a variable appears in a template, it has has a "descritpor".
	For example, consider the variable
	"$atom:CA"
	This is a string which encodes 3 pieces of information.
	1) the category name: This is essentialy indicates the variable's type.
	(ie "atom", in the example above)
	2) the category node: Some TYPES have limited scope. Users can
	specify the root node of the portion of the tree
	in which this variable's type makes sense.
	If this node is the root node, then that category
	is relevant everywhere, and is not molecule or class
	specific. All variables have a category node, which
	is often not explicitly defined to by the user.
	It must be inferred/determined.)
	(Category node = the root "/", in the example above.)
	3) the leaf node: This is a node whose ".name" member matches the name
	of a variable. This node is created for this purpose
	and it's position in the tree is a reflection of
	that variable's intended scope.
	In a molecule this "name" might be the name
	of a type of atom, or an atom ID, or a bond type,
	which is found in a particular molecule.
	(Leaf node would be named "CA" in the example above.)

	The VarNPtr class is simply a 3-tuple which
	keeps these 3 pieces of data together.

	"""

	__slots__=["cat_name","cat_node","leaf_node"]

	def __init__(self, cat_name='', cat_node=None, leaf_node=None):
	self.cat_name = cat_name
	self.cat_node = cat_node
	self.leaf_node = leaf_node

	#def __repr__(self):
	# return repr((self.cat_name, self.cat_node.name, self.leaf_node.name))


	class VarBinding(object):
	""" VarBinding is essentially a tuple consistng of (full_name, binding, refs):

	"self.full_name" is canonical name for this variable. This is a string
	which specifies full path leading to the category node (beginning with '/'),
	the category name (followed by a ':'),
	as well as the leaf node (including the path leading up to it from cat_node)
	This triplet identifies the variable uniquely.

	"self.value" is the data that the variable refers to (usually a string).

	"self.refs" stores a list of VarRefs which mention the same variable
	from the various places inside various templates in the tree.

	"""

	__slots__=["full_name","nptr","value","refs","order","category"]

	def __init__(self,
	full_name = '',
	nptr = None,
	value = None,
	refs = None,
	order = None,
	category = None):
	self.full_name = full_name
	self.nptr = nptr
	self.value = value
	self.refs = refs
	self.order = order
	self.category = category

	def __lt__(self, x):
	return self.order < x.order

	def __repr__(self):
	return repr((self.full_name, self.value, self.order))


	def ExtractCatName(descr_str):
	""" When applied to a VarRef's "descr_str" member,
	this function will extract the "catname" of it's corresponding
	"nptr" member. This can be useful for error reporting.
	(I use it to insure that the user is using the correct counter
	variable types at various locations in their input files.)

	"""

	ib = descr_str.find(':')
	if ib == -1:
	ib = len(descr_str)
	ia = descr_str.rfind('/')
	if ia == -1:
	ia = 0
	return descr_str[ia:ib]
	else:
	str_before_colon = descr_str[0:ib]
	ia = str_before_colon.rfind('/')
	if ia == -1:
	return str_before_colon
	else:
	return str_before_colon[ia+1:]



	def _DeleteLineFromTemplate(tmpl_list,
	i_entry, # index into tmpl_list
	newline_delimiter='\n'):
	""" Delete a single line from tmpl_list.
	tmpl_list is an alternating list of VarRefs and TextBlocks.
	To identify the line, the index corresponding to one of the
	entries in the tmpl_list is used. (Usually it is a VarRef)
	The text after the preceeding newline, and the text up to the next newline
	(starting from the beginning of the current entry, if a TextBlock)
	is deleted, including any VarRef (variables) located in between.

	It returns the index corresponding to the next
	entry in the list (after deletion).

	"""

	i_prev_newline = i_entry
	while i_prev_newline >= 0:
	entry = tmpl_list[i_prev_newline]
	if isinstance(entry, TextBlock):
	i_char_newline = entry.text.rfind(newline_delimiter)
	if i_char_newline != -1: # then newline found
	# Delete the text after this newline
	entry.text = entry.text[:i_char_newline+1]
	break
	i_prev_newline -= 1

	first_var = True
	#i_next_newline = i_entry
	i_next_newline = i_prev_newline+1
	while i_next_newline < len(tmpl_list):
	entry = tmpl_list[i_next_newline]
	if isinstance(entry, TextBlock):
	i_char_newline = entry.text.find(newline_delimiter)
	if i_char_newline != -1: # then newline found
	# Delete the text before this newline (including the newline)
	entry.text = entry.text[i_char_newline+1:]
	break
	# Invoke DeleteSelf() on the first variables on this line. This will
	# insure that it is deleted from the ttree_assignments.txt file.
	elif isinstance(entry, VarRef):
	if first_var:
	entry.nptr.leaf_node.DeleteSelf()
	first_var = False
	i_next_newline += 1

	del tmpl_list[i_prev_newline + 1 : i_next_newline]
	return i_prev_newline + 1



	def DeleteLinesWithBadVars(tmpl_list,
	delete_entire_template = False,
	newline_delimiter = '\n'):
	"""
	Loop through the entries in a template,
	an alternating list of TextBlocks and VarRefs (tmpl_list).
	If a VarRef points to a leaf_node which no longer exists
	(ie. no longer in the corresponding category's .bindings list).
	Then delete the line it came from from the template (tmpl_list).

	"""

	out_str_list = []
	i = 0
	while i < len(tmpl_list):
	entry = tmpl_list[i]
	if isinstance(entry, VarRef):
	var_ref = entry
	var_bindings = var_ref.nptr.cat_node.categories[var_ref.nptr.cat_name].bindings
	#if var_ref.nptr.leaf_node not in var_bindings:
	if var_ref.nptr.leaf_node.IsDeleted():
	if delete_entire_template:
	del tmpl_list[:]
	return 0
	else:
	i = _DeleteLineFromTemplate(tmpl_list,
	i,
	newline_delimiter)
	else:
	i += 1
	else:
	i += 1







	def SplitTemplate(ltmpl, delim, delete_blanks = False):
	"""
	Split a template "ltmpl" into a list of "tokens" (sub-templates)
	using a single delimiter string "delim".

	INPUT arguments:
	"ltmpl" should be an list of TextBlocks and VarRefs.
	"delim" should be a simple string (type str)
	"delete_blanks" should be a boolean True/False value.
	When true, successive occurrences of the delimiter
	should not create blank entries in the output list.

	OUTPUT:
	A list of tokens.
	Each "token" is either a TextBlock, a VarRef,
	or a (flat, 1-dimensional) list containing more than one of these objects.
	The number of "tokens" returned equals the number of times the delimiter
	is encountered in any of the TextBlocks in the "ltmpl" argument, plus one.
	(... Unless "delete_blanks" is set to True.
	Again, in that case, empty entries in this list are deleted.)

	"""
	assert(type(delim) is str)
	if not hasattr(ltmpl, '__len__'):
	ltmpl = [ltmpl]

	tokens_lltmpl = []
	token_ltmpl = []
	i = 0
	while i < len(ltmpl):
	entry = ltmpl[i]
	if isinstance(entry, TextBlock):
	#if hasattr(entry, 'text'):
	prev_src_loc = entry.srcloc

	tokens_str = entry.text.split(delim)

	lineno = entry.srcloc.lineno

	j = 0
	while j < len(tokens_str):
	token_str = tokens_str[j]

	delim_found = False
	if (j < len(tokens_str)-1):
	delim_found = True

	if token_str == '':
	if delete_blanks:
	if delim == '\n':
	lineno += 1
	if len(token_ltmpl) > 0:
	if len(token_ltmpl) == 1:
	tokens_lltmpl.append(token_ltmpl[0])
	else:
	tokens_lltmpl.append(token_ltmpl)
	del token_ltmpl
	token_ltmpl = []
	j += 1
	continue

	new_src_loc = OSrcLoc(prev_src_loc.infile, lineno)
	new_src_loc.order = prev_src_loc.order

	for c in token_str:
	# Reminder to self: c != delim (so c!='\n' if delim='\n')
	# (We keep track of '\n' characters in delimiters above.)
	if c == '\n':
	lineno +=1

	new_src_loc.lineno = lineno

	text_block = TextBlock(token_str,
	new_src_loc)

	prev_src_loc = new_src_loc

	if len(token_ltmpl) == 0:
	if delim_found:
	tokens_lltmpl.append(text_block)
	del token_ltmpl
	token_ltmpl = []
	else:
	token_ltmpl.append(text_block)
	else:
	if delim_found:
	if len(token_str) > 0:
	token_ltmpl.append(text_block)
	tokens_lltmpl.append(token_ltmpl)
	del token_ltmpl
	token_ltmpl = []
	else:
	assert(not delete_blanks)
	if (isinstance(token_ltmpl[-1], VarRef)
	and
	((j>0)
	or
	((j == len(tokens_str)-1) and
	(i == len(ltmpl)-1))
	)):
	# In that case, this empty token_str corresponds
	# to a delimiter which was located immediately
	# after the variable name,
	# AND
	# -there is more text to follow,
	# OR
	# -we are at the end of the template.
	token_ltmpl.append(text_block)
	if len(token_ltmpl) == 1:
	tokens_lltmpl.append(token_ltmpl[0])
	else:
	tokens_lltmpl.append(token_ltmpl)
	del token_ltmpl
	token_ltmpl = []
	else:
	token_ltmpl.append(text_block)

	if (delim_found and (delim == '\n')):
	lineno += 1

	j += 1

	elif isinstance(entry, VarRef):
	#elif hasattr(entry, 'descr_str'):
	lineno = entry.srcloc.lineno
	if ((len(token_ltmpl) == 1) and
	isinstance(token_ltmpl[0], TextBlock) and
	(len(token_ltmpl[0].text) == 0)):
	# special case: if the previous entry was "", then it means
	# the delimeter appeared at the end of the previous text block
	# leading up to this variable. It separates the variable from
	# the previous text block. It is not a text block of length 0.
	token_ltmpl[0] = entry
	else:
	token_ltmpl.append(entry)
	elif entry == None:
	token_ltmpl.append(entry)
	else:
	assert(False)

	i += 1

	# Append left over remains of the last token
	if len(token_ltmpl) == 1:
	tokens_lltmpl.append(token_ltmpl[0])
	elif len(token_ltmpl) > 1:
	tokens_lltmpl.append(token_ltmpl)
	del token_ltmpl

	return tokens_lltmpl






	def SplitTemplateMulti(ltmpl, delims, delete_blanks=False):
	"""
	Split a template "ltmpl" into a list of templates using a
	single one or more delimiter strings "delim_list".
	If multiple delimiter strings are provided, splitting
	begins using the first delimiter string in the list.
	Then each token in the resulting list of templates
	is split using the next delimiter string
	and so on until we run out of delimiter strings.

	"ltmpl" should be an list of TextBlocks and VarRefs.
	"delims" should be a simple string (type str) or a list of strings
	"delete_blanks" is either True or False
	If True, then any blank entries in the resulting list of
	tokens (sub-templates) will be deleted.

	"""

	if hasattr(delims, '__len__'): # then it hopefully is a list of strings
	delim_list = delims
	else:
	delim_list = [delims] # then it hopefully is a string

	tokens = [ltmpl]
	for delim in delim_list:
	assert(type(delim) is str)
	tokens_il = []
	for t in tokens:
	sub_tokens = SplitTemplate(t, delim, delete_blanks)
	for st in sub_tokens:
	if hasattr(st, '__len__'):
	if (len(st) > 0) or (not delete_blanks):
	tokens_il.append(st)
	else:
	tokens_il.append(st)
	tokens = tokens_il
	del tokens_il

	return tokens



	def _TableFromTemplate(d, ltmpl, delimiters, delete_blanks):
	"""
	See the docstring for the TableFromTemplate() function for an explanation.
	(This _TableFromTemplate() and SplitTemplate() are the workhorse functions
	for TableFromTemplate().)

	"""

	output = SplitTemplateMulti(ltmpl, delimiters[d], delete_blanks[d])

	if d > 0:
	i = 0
	while i < len(output):
	output[i] = _TableFromTemplate(d-1,
	output[i],
	delimiters,
	delete_blanks)
	# Delete empty LISTS?
	if (delete_blanks[d] and
	hasattr(output[i], '__len__') and
	(len(output[i]) == 0)):
	del output[i]
	else:
	i += 1

	return output


	def TableFromTemplate(ltmpl, delimiters, delete_blanks=True):
	"""
	This function can be used to split a template
	(a list containing TextBlocks and VarRefs) into a table
	into a multidimensional table, with an arbitrary number of dimensions.

	Arguments:

	ltmpl

	An alternating list of TextBlocks and VarRefs containing
	the contents of this text template.

	delimiters

	The user must supply a list or tuple of delimiters: one delimiter for
	each dimension in the table, with low-priority delimiters
	(such as spaces ' ') appearing first, and higher-priority delimiters
	(sich as newlines '\n') appearing later on in the list.
	This function will divide the entire "ltmpl" into an n-dimensional
	table. Initially the text is split into a list of text using the
	highest-priority delimiter. Then each entry in the resulting list is
	split into another list according to the next highest-priority delimiter.
	This continues until all of the delimiters are used up and an
	n-dimensional list-of-lists is remaining.

	delete_blanks

	The optional "delete_blanks" argument can be used to indicate whether
	or not to delete blank entries in the table (which occur as a result
	of placing two delimiters next to each other). It should be either
	None (default), or it should be an array of booleans matching the
	size of the "delimiters" argument. This allows the caller to customize
	the merge settings separately for each dimension (for example: to allow
	merging of whitespace within a line, without ignoring blank lines).


	---- Details: ----

	1) Multi-character delimiters ARE allowed (like '\n\n').

	2) If a delimiter in the "delimiters" argument is not a string
	but is a tuple (or a list) of strings, then the text is split according
	to any of the delimiters in that tuple/list (starting from the last entry).
	This way, users can use this feature to split text according to multiple
	different kinds of whitespace characters (such as ' ' and '\t'), for
	example, buy setting delimiters[0] = (' ','\t'). If, additionally,
	delete_blanks[0] == True, then this will cause this function to
	divide text in without regard to whitespace on a given line (for example).

	Detailed example:

	table2D = TableFromTmplList(ltmpl,
	delimiters = ((' ','\t'), '\n'),
	delete_blanks = (True, False))

	This divides text in a similar way that the "awk" program does by default,
	ie, by ignoring various kinds of whitespace between text fields, but NOT
	ignoring blank lines.

	3) Any text contained in variable-names is ignored.

	"""

	# Make a copy of ltmpl
	# (The workhorse function "_TableFromTemplate()" makes in-place changes to
	# its "ltmpl" argument. I don't want to modify "ltmpl", so I make a copy
	# of it before I invoke "_TableFromTemplate()" on it.)

	output = [ltmpl[i] for i in range(0, len(ltmpl))]

	d = len(delimiters) - 1
	output = _TableFromTemplate(d, output, delimiters, delete_blanks)
	return output










	class TemplateLexer(TtreeShlex):
	""" This class extends the standard python lexing module, shlex, adding a
	new member function (ReadTemplate()), which can read in a block of raw text,
	(halting at an (non-escaped) terminal character), and split the text into
	alternating blocks of text and variables. (As far as this lexer is
	concerned, "variables" are simply tokens preceeded by $ or @ characters,
	and surrounded by optional curly-brackets {}.)

	"""
	def __init__(self,
	instream=None,
	infile=None,
	posix=False):
	TtreeShlex.__init__(self, instream, infile, posix)
	self.var_delim = '$@' #characters which can begin a variable name
	self.var_open_paren = '{' #optional parenthesis surround a variable
	self.var_close_paren = '}' #optional parenthesis surround a variable
	self.newline = '\n'
	self.comment_skip_var = '#'

	# Which characters belong in words?
	#
	# We want to allow these characters:
	# ./$@&%^!*~`-_:;?<>[]()
	# to appear inside the tokens that TtreeShlex.get_token()
	# retrieves (TtreeShlex.get_token() is used to read class
	# names, and instance names, and variable names)
	#
	# settings.lex.wordchars+='./$@&%^!*~`-_+:;?<>[]' #Allow these chars
	#
	# Ommisions:
	# Note: I left out quotes, whitespace, comment chars ('#'), and escape
	# characters ('\\') because they are also dealt with separately.
	# Those characters should not overlap with settings.lex.wordchars.
	#
	# Enabling unicode support requires that we override this choice
	# by specifying "lex.wordterminators" instead of "wordchars".
	#
	# lex.wordterminators should be the (printable) set inverse of lex.wordchars
	# I'm not sure which ascii characters are NOT included in the string above
	# (We need to figure that out, and put them in settings.lex.wordterminators)
	# To figure that out, uncomment the 8 lines below:
	#
	#self.wordterminators=''
	#for i in range(0,256):
	# c = chr(i)
	# if c not in self.wordchars:
	# self.wordterminators += c
	#sys.stderr.write('-------- wordterminators = --------\n')
	#sys.stderr.write(self.wordterminators+'\n')
	#sys.stderr.write('-----------------------------------\n')
	#
	# Here is the result:
	self.wordterminators = '(),={\|}' + \
	self.whitespace + \
	self.quotes + \
	self.escape + \
	self.commenters
	# Note:
	# self.whitespace = ' \t\r\f\n'
	# self.quotes = '\'"'
	# self.escape = '\\'
	# self.commenters = '#'

	self.source_triggers=set(['include','import'])
	self.source_triggers_x=set(['import'])




	def GetSrcLoc(self):
	return OSrcLoc(self.infile, self.lineno)


	def ReadTemplate(self,
	simplify_output=False,
	terminators='}',
	other_esc_chars='{',
	keep_terminal_char = True):
	"""
	ReadTemplate() reads a block of text (between terminators)
	and divides it into variables (tokens following a '$' or '@' character)
	and raw text. This is similar to pythons string.Template(),
	however it reads from streams (files), not strings, and it allows use
	of more complicated variable names with multiple variable delimiters
	(eg '$' and '@').
	This readline()-like member function terminates when reaching a
	user-specified terminator character character (second argument),
	or when variable (eg: "$var"$ is encountered). The result is
	a list of variable-separated text-blocks (stored in the first
	argument). For example, the string:
	"string with $var1 and $var2 variables.}" contains:
	"string with ",
	$var1,
	" and ",
	$var2,
	" variables.}"
	This simplifies the final process of rendering
	(substituting text into) the text blocks later on.
	Output:
	This function returns a list of (alternating) blocks of
	text, and variable names. Each entry in the list is either:
	1) a text block:
	Raw text is copied from the source, verbatim, along with
	some additional data (filename and line numbers), to
	help retroactively identify where the text came from
	(in case a syntax error in the text is discovered later).
	In this case, the list entry is stored as a list
	The format (TextBlock) is similar to:
	[text_string, ((filenameA,lineBegin), (filenameB,lineEnd))],
	where the tuples, (filenameA,lineBegin) and (filenameB,lineEnd)
	denote the source file(s) from which the text was read, and
	line number at the beginning and ending of the text block.
	(This information is useful for generating helpful error
	messages. Note that the "TtreeShlex" class allows users to
	combine multiple files transparently into one stream using
	the "source" (or "sourcehook()") member. For this reason, it
	is possible, although unlikely, that the text-block
	we are reading could span multiple different files.)
	2) a variable (for example "$var" or "${var}"):
	In this case, the list entry is stored in the "VarRef" format
	which is essentialy shown below:
	[[var_prefix, var_nptr, var_suffix], (filename,lineno)]
	where var_prefix and var_suffix are strings containing brackets
	and other text enclosing the variable name (and may be empty).

	As an example, we consider a file named "datafile" which
	contains the text containing 2 text blocks and 1 variable:
	"some\n text\n before ${var}. Text after\n".
	ReadTemplate() will read this and return a list with 3 entries:
	[ ['some\n text\n before', (('datafile', 1), ('datafile', 3))],
	[['${', 'var', '}'], ('datafile', 3, 3)],
	['Text after\n', (('datafile', 3), ('datafile', 4))] ]

	Note that while parsing the text, self.lineno counter is
	incremented whenever a newline character is encountered.
	(Also: Unlike shlex.get_token(), this function does not
	delete commented text, or insert text from other files.)

	Exceptional Cases:
	Terminator characters are ignored if they are part of a variable
	reference. (For example, the '}' in "${var}", is used to denote a
	bracketed variable, and does not cause ReadTemplate() to stop reading)
	OR if they are part of a two-character escape sequence
	(for example, '}' in "\}" does not cause terminate parsing).
	In that case, the text is considered normal text. (However the
	'\' character is also stripped out. It is also stripped out if it
	preceeds any characters in "other_esc_chars", which is
	the second argument. Otherwise it is left in the text block.)

	"""
	#print(' ReadTemplate('+terminators+') invoked at '+self.error_leader())

	# The main loop of the parser reads only one variable at time.
	# The following variables keep track of where we are in the template.
	reading_var=False # Are we currently reading in the name of a variable?

	prev_char_delim=False #True iff we just read a var_delim character like '$'
	escaped_state=False #True iff we just read a (non-escaped) esc character '\'
	commented_state=False #True iff we are in a region of text where vars should be ignored
	var_paren_depth=0 # This is non-zero iff we are inside a
	# bracketed variable's name for example: "${var}"
	var_terminators = self.whitespace + self.newline + self.var_delim + '{}'

	tmpl_list = [] # List of alternating tuples of text_blocks and
	# variable names (see format comment above)
	# This list will be returned to the caller.

	#sys.stderr.write('report_progress='+str(report_progress))

	prev_filename = self.infile
	prev_lineno = self.lineno
	var_prefix = ''
	var_descr_plist = []
	var_suffix = ''
	text_block_plist = []

	done_reading = False

	while not done_reading:

	terminate_text = False
	terminate_var = False
	#delete_prior_escape = False

	nextchar = self.read_char()


	#print(' ReadTemplate() nextchar=\''+nextchar+'\' at '+self.error_leader()+' esc='+str(escaped_state)+', pvar='+str(prev_char_delim)+', paren='+str(var_paren_depth))



	# Count newlines:
	if nextchar in self.newline:
	commented_state = False
	self.lineno += 1

	elif ((nextchar in self.comment_skip_var) and
	(not escaped_state)):
	commented_state = True

	# Check for end-of-file:
	if nextchar == '':

	if escaped_state:
	raise InputError('Error: in '+self.error_leader()+'\n\n'
	'No escaped character.')
	if reading_var:
	terminate_var = True
	else:
	terminate_text = True

	done_reading = True


	# --- Now process the character: ---


	# What we do next depends on which "mode" we are in.
	# If we are reading a regular text block (reading_var == False),
	# then we keep appending characters onto the end of "text_block",
	# checking for terminal characters, or variable delimiters.
	# If we are reading a variable name (reading_var == True),
	# then we append characters to the end of "var_descr_plist[]",
	# checking for variable terminator characters, as well as
	# parenthesis (some variables are surrounded by parenthesis).

	elif reading_var:

	if nextchar in terminators:
	#sys.stdout.write(' ReadTemplate() readmode found terminator.\n')
	if escaped_state:
	# In this case, the '\' char was only to prevent terminating
	# string prematurely, so delete the '\' character.
	#delete_prior_escape = True
	if not (nextchar in self.var_close_paren):
	del var_descr_plist[-1]
	var_descr_plist.append(nextchar)

	elif not ((var_paren_depth>0) and (nextchar in self.var_close_paren)):
	terminate_var = True
	done_reading = True

	if nextchar in self.var_open_paren: # eg: nextchar == '{'
	#sys.stdout.write(' ReadTemplate() readmode found {.\n')
	if escaped_state:
	# In this case, the '\' char was only to prevent
	# interpreting '{' as a variable prefix
	#delete_prior_escape=True # so delete the '\' character
	del var_descr_plist[-1]
	var_descr_plist.append(nextchar)
	else:
	# "${var}" is a valid way to refer to a variable
	if prev_char_delim:
	var_prefix += nextchar
	var_paren_depth = 1
	# "${{var}}" is also a valid way to refer to a variable,
	# (although strange), but "$va{r}" is not.
	# Parenthesis (in bracketed variable names) must
	# immediately follow the '$' character (as in "${var}")
	elif var_paren_depth > 0:
	var_paren_depth += 1

	elif nextchar in self.var_close_paren:
	#sys.stdout.write(' ReadTemplate() readmode found }.\n')
	if escaped_state:
	# In this case, the '\' char was only to prevent
	# interpreting '}' as a variable suffix,
	#delete_prior_escape=True #so skip the '\' character
	if (nextchar not in terminators):
	del var_descr_plist[-1]
	var_descr_plist.append(nextchar)
	else:
	if var_paren_depth > 0:
	var_paren_depth -= 1
	if var_paren_depth == 0:
	var_suffix = nextchar
	terminate_var = True

	elif nextchar in var_terminators:
	#sys.stdout.write(' ReadTemplate() readmode found var_terminator \"'+nextchar+'\"\n')
	if (escaped_state or (var_paren_depth>0)):
	# In this case, the '\' char was only to prevent
	# interpreting nextchar as a variable terminator
	#delete_prior_escape = True # so skip the '\' character
	del var_descr_plist[-1]
	var_descr_plist.append(nextchar)
	else:
	terminate_var = True

	elif nextchar in self.var_delim: # such as '$'
	#sys.stdout.write(' ReadTemplate() readmode found var_delim.\n')
	if escaped_state:
	# In this case, the '\' char was only to prevent
	# interpreting '$' as a new variable name
	#delete_prior_escape = True # so skip the '\' character
	del var_descr_plist[-1]
	var_descr_plist.append(nextchar)
	else:
	prev_var_delim = True
	# Then we are processing a new variable name
	terminate_var = True
	else:
	var_descr_plist.append(nextchar)
	prev_char_delim = False


	else: # begin else clause for "if reading_var:"

	# Then we are reading a text_block

	if nextchar in terminators:
	if escaped_state:
	# In this case, the '\' char was only to prevent terminating
	# string prematurely, so delete the '\' character.
	#delete_prior_escape = True
	del text_block_plist[-1]
	text_block_plist.append(nextchar)
	elif commented_state:
	text_block_plist.append(nextchar)
	else:
	terminate_text = True
	done_reading = True

	elif nextchar in self.var_delim: # such as '$'
	if escaped_state:
	# In this case, the '\' char was only to prevent
	# interpreting '$' as a variable prefix.
	#delete_prior_escape=True #so delete the '\' character
	del text_block_plist[-1]
	text_block_plist.append(nextchar)
	elif commented_state:
	text_block_plist.append(nextchar)
	else:
	prev_char_delim = True
	reading_var = True
	var_paren_depth = 0
	terminate_text = True
	else:
	text_block_plist.append(nextchar)
	#TO DO: use "list_of_chars.join()" instead of '+='
	prev_char_delim = False # the previous character was not '$'


	# Now deal with "other_esc_chars"
	#if escaped_state and (nextchar in other_esc_chars):

	if escaped_state and (nextchar in other_esc_chars):
	if reading_var:
	#sys.stdout.write(' ReadTemplate: var_descr_str=\''+''.join(var_descr_plist)+'\'\n')
	assert(var_descr_plist[-2] in self.escape)
	del var_descr_plist[-2]
	else:
	#sys.stdout.write(' ReadTemplate: text_block=\''+''.join(text_block_plist)+'\'\n')
	assert(text_block_plist[-2] in self.escape)
	del text_block_plist[-2]


	if terminate_text:
	#sys.stdout.write('ReadTemplate() appending: ')
	#sys.stdout.write(text_block)

	#tmpl_list.append( [text_block,
	# ((prev_filename, prev_lineno),
	# (self.infile, self.lineno))] )

	if simplify_output:
	tmpl_list.append(''.join(text_block_plist))
	else:
	tmpl_list.append(TextBlock(''.join(text_block_plist),
	OSrcLoc(prev_filename, prev_lineno)))
	#, OSrcLoc(self.infile, self.lineno)))
	if not done_reading:
	# The character that ended the text block
	# was a variable delimiter (like '$'), in which case
	# we should put it (nextchar) in the variable's prefix.
	var_prefix = nextchar
	else:
	var_prefix = ''
	var_descr_plist = []
	var_suffix = ''
	prev_filename = self.infile
	prev_lineno = self.lineno
	del text_block_plist
	text_block_plist = []
	#gc.collect()


	elif terminate_var:
	# Print an error if we terminated in the middle of
	# an incomplete variable name:
	if prev_char_delim:
	raise InputError('Error: near '+self.error_leader()+'\n\n'
	'Null variable name.')
	if var_paren_depth > 0:
	raise InputError('Error: near '+self.error_leader()+'\n\n'
	'Incomplete bracketed variable name.')

	var_descr_str = ''.join(var_descr_plist)

	# Now check for variable format modifiers,
	# like python's ".rjust()" and ".ljust()".
	# If present, then put these in the variable suffix.
	if ((len(var_descr_plist)>0) and (var_descr_plist[-1]==')')):
	#i = len(var_descr_plist)-1
	#while i >= 0:
	# if var_descr_plist[i] == '(':
	# break
	# i -= 1
	i = var_descr_str.rfind('(')
	if (((i-6) >= 0) and
	((var_descr_str[i-6:i] == '.rjust') or
	(var_descr_str[i-6:i] == '.ljust'))):
	var_suffix =''.join(var_descr_plist[i-6:])+var_suffix
	#var_descr_plist = var_descr_plist[:i-6]
	var_descr_str = var_descr_str[:i-6]

	# Process any special characters in the variable name
	var_descr_str = EscCharStrToChar(var_descr_str)

	#tmpl_list.append( [[var_prefix, var_descr_str, var_suffix],
	# (self.infile, self.lineno)] )
	if simplify_output:
	tmpl_list.append(var_prefix + var_descr_str + var_suffix)
	else:
	tmpl_list.append( VarRef(var_prefix, var_descr_str, var_suffix,
	OSrcLoc(self.infile, self.lineno)) )

	#if report_progress:
	#sys.stderr.write(' parsed variable '+var_prefix+var_descr_str+var_suffix+'\n')

	#sys.stdout.write('ReadTemplate() appending: ')
	#print(var_prefix + var_descr_str + var_suffix)

	del var_descr_plist
	del var_descr_str

	prev_filename = self.infile
	prev_lineno = self.lineno
	var_prefix = ''
	var_descr_plist = []
	var_suffix = ''
	# Special case: Variable delimiters like '$'
	# terminate the reading of variables,
	# but they also signify that a new
	# variable is being read.
	if nextchar in self.var_delim:
	# Then we are processing a new variable name
	prev_var_delim = True
	reading_var = True
	var_paren_depth = 0
	var_prefix = nextchar

	elif nextchar in self.var_close_paren:
	del text_block_plist
	text_block_plist = []
	#gc.collect()
	prev_var_delim = False
	reading_var = False

	else:
	# Generally, we don't want to initialize the next text block
	# with the empty string. Consider that whatever character
	# caused us to stop reading the previous variable and append
	# it to the block of text that comes after.
	del text_block_plist
	text_block_plist = [nextchar]
	#gc.collect()
	prev_var_delim = False
	reading_var = False


	# If we reached the end of the template (and the user requests it),
	# then the terminal character can be included in the list
	# of text_blocks to be returned to the caller.
	if done_reading and keep_terminal_char:
	#sys.stdout.write('ReadTemplate() appending: \''+nextchar+'\'\n')
	# Here we create a new text block which contains only the
	# terminal character (nextchar).
	#tmpl_list.append( [nextchar,
	# ((self.infile, self.lineno),
	# (self.infile, self.lineno))] )
	if simplify_output:
	tmpl_list.append(nextchar)
	else:
	tmpl_list.append(TextBlock(nextchar,
	OSrcLoc(self.infile, self.lineno)))
	#, OSrcLoc(self.infile, self.lineno)))


	if escaped_state:
	escaped_state = False
	else:
	if nextchar in self.escape:
	escaped_state = True

	#print("* TMPL_LIST0 = *", tmpl_list)
	return tmpl_list # <- return value stored here




	def GetParenExpr(self, prepend_str='', left_paren='(', right_paren=')'):
	""" GetParenExpr() is useful for reading in strings
	with nested parenthesis and spaces.
	This function can read in the entire string:

	.trans(0, 10.0sin(30), 10.0cos(30))

	(Because I was too lazy to write this correctly...)
	Spaces are currently stripped out of the expression.
	(...unless surrounded by quotes) The string above becomes:

	".trans(0,10.0sin(30),10.0cos(30))"

	Sometimes the caller wants to prepend some text to the beginning
	of the expression (which may contain parenthesis). For this
	reason, an optional first argument ("prepend_str") can be
	provided. By default it is empty.

	"""
	orig_wordterm = self.wordterminators
	self.wordterminators = self.wordterminators.replace(left_paren,'').replace(right_paren,'')

	token = self.get_token()
	if ((token == '') or
	(token == self.eof)):
	return prepend_str


	expr_str = prepend_str + token

	#if (expr_str.find(left_paren) == -1):
	# raise InputError('Error near or before '+self.error_leader()+'\n'
	# 'Expected an open-paren (\"'+prepend_str+left_paren+'\") before this point.\n')
	# return expr_str

	paren_depth = expr_str.count(left_paren) - expr_str.count(right_paren)
	while ((len(expr_str) == 0) or (paren_depth > 0)):
	token = self.get_token()
	if ((type(token) is not str) or
	(token == '')):
	raise InputError('Error near or before '+self.error_leader()+'\n'
	'Invalid expression: \"'+expr_str+'\"')
	expr_str += token
	paren_depth = expr_str.count(left_paren) - expr_str.count(right_paren)
	if (paren_depth != 0):
	raise InputError('Error near or before '+self.error_leader()+'\n'
	'Invalid expression: \"'+expr_str+'\"')
	self.wordterminators = orig_wordterm
	return expr_str










	if __name__ == '__main__':
	if len(sys.argv) == 1:
	lexer = TtreeShlex()
	else:
	file = sys.argv[1]
	lexer = TtreeShlex(open(file), file)
	while 1:
	tt = lexer.get_token()
	if tt:
	print("Token: " + repr(tt))
	else:
	break

ttree_lex.pyNo OneTemporaryActions

File Metadata

ttree_lex.pyView Options

Event Timeline

ttree_lex.py
No OneTemporary
Actions

ttree_lex.py
View Options