io_xml.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Wed, Jul 16, 14:00

io_xml.py
View Options

	"""Contains the functions used to read the input file and print the checkpoint
	files with xml formatting.

	Copyright (C) 2013, Joshua More and Michele Ceriotti

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program. If not, see <http.//www.gnu.org/licenses/>.


	Functions:
	xml_node: Class to handle a particular xml tag.
	xml_handler: Class giving general xml data reading methods.
	xml_parse_string: Parses a string made from a section of a xml input file.
	xml_parse_file: Parses an entire xml input file.
	read_type: Reads a string and outputs data of a specified type.
	read_float: Reads a string and outputs a float.
	read_int: Reads a string and outputs an integer.
	read_bool: Reads a string and outputs a boolean.
	read_list: Reads a string and outputs a list.
	read_array: Reads a string and outputs an array.
	read_tuple: Reads a string and outputs a tuple.
	read_dict: Reads a string and outputs a dictionary.
	write_type: Writes a string from data of a specified type.
	write_list: Writes a string from a list.
	write_tuple: Writes a string from a tuple.
	write_float: Writes a string from a float.
	write_bool: Writes a string from a boolean.
	write_dict: Writes a string from a dictionary.
	"""

	__all__ = ['xml_node', 'xml_handler', 'xml_parse_string', 'xml_parse_file',
	'read_type', 'read_float', 'read_int', 'read_bool', 'read_list',
	'read_array', 'read_tuple', 'read_dict', 'write_type', 'write_list',
	'write_tuple', 'write_float', 'write_bool', 'write_dict']

	from xml.sax import parseString, parse
	from xml.sax.handler import ContentHandler
	import numpy as np
	import string

	class xml_node(object):
	"""Class to handle a particular xml tag.

	Tags are generally written in the form
	<tag_name attribs="attrib_data"> main_data </tag_name>. This class holds
	tag_name, attrib_data and main_data separately so they can be used to
	create the objects with the appropriate names and data.

	Attributes:
	attribs: The attribute data for the tag.
	fields: The rest of the data.
	name: The tag name.
	"""

	def __init__(self, attribs=None, name="", fields=None):
	"""Initialises xml_node.

	Args:
	attribs: An optional dictionary giving attribute data. Defaults to {}.
	fields: An optional dictionary holding all the data between the start
	and end tags, including information about other nodes.
	Defaults to {}.
	name: An optional string giving the tag name. Defaults to ''.
	"""

	if attribs is None:
	attribs = {}
	if fields is None:
	fields = []

	self.attribs = attribs
	self.name = name
	self.fields = fields


	class xml_handler(ContentHandler):
	"""Class giving general xml_reading methods.

	Uses the standard python xml_reader to read the different kinds of data.
	Keeps track of the heirarchial nature of an xml file by recording the level
	of nesting, so that the correct data and attributes can be associated with
	the correct tag name.

	Attributes:
	root: An xml_node object for the root node.
	open: The list of the tags that the parser is currently between the start
	and end tags of.
	level: The level of nesting that the parser is currently at.
	buffer: A list of the data found between the tags at the different levels
	of nesting.
	"""

	def __init__(self):
	"""Initialises xml_handler."""

	#root xml node with all the data
	self.root = xml_node(name="root", fields=[])
	self.open = [self.root]
	#current level of the hierarchy
	self.level = 0
	#Holds all the data between each of the tags.
	#If level = 1, then buffer[0] holds all the data collected between the
	#root tags, and buffer[1] holds all the data collected between the
	#first child tag.
	self.buffer = [[""]]

	def startElement(self, name, attrs):
	"""Reads an opening tag.

	Adds the opening tag to the list of open tags, adds a new space in the
	buffer, reads the appropriate attributes and adds a new level to the
	heirarchy.

	Args:
	name: The tag_name.
	attrs: The attribute data.
	"""

	#creates a new node
	newnode = xml_node(attribs=dict((k,attrs[k]) for k in attrs.keys()), name=name, fields=[])
	#adds it to the list of open nodes
	self.open.append(newnode)
	#adds it to the list of fields of the parent tag
	self.open[self.level].fields.append((name,newnode))
	#gets ready to read new data
	self.buffer.append([""])
	self.level += 1

	def characters(self, data):
	"""Reads data.

	Adds the data to the buffer of the current level of the heirarchy.
	Data is read as a string, and needs to be converted to the required
	type later.

	Args:
	data: The data to be read.
	"""

	self.buffer[self.level].append(data)

	def endElement(self, name):
	"""Reads a closing tag.

	Once all the data has been read, and the closing tag found, the buffer
	is read into the appropriate field.

	Args:
	name: The tag_name.
	"""

	#all the text found between the tags stored in the appropriate xml_node
	#object
	self.buffer[self.level] = ''.join(self.buffer[self.level])
	self.open[self.level].fields.append(("_text" , self.buffer[self.level]))
	#'closes' the xml_node object, as we are no longer within its tags, so
	#there is no more data to be added to it.
	#Note that the xml_node is still held within the parent tag, so we
	#no longer require this xml node object.
	self.buffer.pop(self.level)
	self.open.pop(self.level)
	self.level -= 1

	def xml_parse_string(buf):
	"""Parses a string made from a section of a xml input file.

	Args:
	buf: A string in correct xml format.

	Returns:
	A xml_node for the root node of the file.
	"""

	myhandle = xml_handler()
	parseString(buf, myhandle)
	return myhandle.root

	def xml_parse_file(stream):
	"""Parses an entire xml input file.

	Args:
	stream: A string describing a xml formatted file.

	Returns:
	A xml_node for the root node of the file.
	"""

	myhandle = xml_handler()
	parse(stream, myhandle)
	return myhandle.root

	def read_type(type, data):
	"""Reads a string and outputs data of a specified type.

	Args:
	type: The data type of the target container.
	data: The string to be read in.

	Raises:
	TypeError: Raised if it tries to read into a data type that has not been
	implemented.

	Returns:
	An object of type type.
	"""

	if not type in readtype_funcs:
	raise TypeError("Conversion not available for given type")
	return type(readtype_funcs[type](data))

	def read_float(data):
	"""Reads a string and outputs a float.

	Args:
	data: The string to be read in.

	Raises:
	ValueError: Raised if the input data is not of the correct format.

	Returns:
	A float.
	"""

	return float(data)

	def read_int(data):
	"""Reads a string and outputs a integer.

	Args:
	data: The string to be read in.

	Raises:
	ValueError: Raised if the input data is not of the correct format.

	Returns:
	An integer.
	"""

	return int(data)

	def read_bool(data):
	"""Reads a string and outputs a boolean.

	Takes a string of the form 'true' or 'false', and returns the appropriate
	boolean.

	Args:
	data: The string to be read in.

	Raises:
	ValueError: Raised if the string is not 'true' or 'false'.

	Returns:
	A boolean.
	"""


	if data.strip().upper() == "TRUE":
	return True
	elif data.strip().upper() == "FALSE":
	return False
	else:
	raise ValueError(data + " does not represent a bool value")

	def read_list(data, delims="[]", split=",", strip=" \n\t'"):
	"""Reads a formatted string and outputs a list.

	The string must be formatted in the correct way.
	The start character must be delimiters[0], the end character
	must be delimiters[1] and each element must be split along
	the character split. Characters at the beginning or
	end of each element in strip are ignored. The standard list format is of the
	form '[array[0], array[1],..., array[n]]', which is used for actual lists.
	Other formats are used for tuples and dictionaries.

	Args:
	data: The string to be read in. '[]' by default.
	delims: A string of two characters giving the first and last character of
	the list format. ',' by default.
	split: The character between different elements of the list format.
	strip: Characters to be removed from the beginning and end of each
	element. ' \n\t' by default.

	Raises:
	ValueError: Raised if the input data is not of the correct format.

	Returns:
	A list of strings.
	"""

	try:
	begin = data.index(delims[0])
	end = data.index(delims[1])
	except ValueError:
	raise ValueError("Error in list syntax: could not locate delimiters")

	rlist = data[begin+1:end].split(split)
	for i in range(len(rlist)):
	rlist[i] = rlist[i].strip(strip)

	# handles empty lists correctly
	if len(rlist) == 1 and rlist[0] == "":
	rlist = []

	return rlist

	def read_array(dtype, data):
	"""Reads a formatted string and outputs an array.

	The format is as for standard python arrays, which is
	[array[0], array[1], ... , array[n]]. Note the use of comma separators, and
	the use of square brackets.

	Args:
	data: The string to be read in.
	dtype: The data type of the elements of the target array.

	Raises:
	ValueError: Raised if the input data is not of the correct format.

	Returns:
	An array of data type dtype.
	"""

	rlist = read_list(data)
	for i in range(len(rlist)):
	rlist[i] = read_type(dtype,rlist[i])

	return np.array(rlist, dtype)

	def read_tuple(data, delims="()", split=",", strip=" \n\t'", arg_type=int):
	"""Reads a formatted string and outputs a tuple.

	The format is as for standard python tuples, which is
	(tuple[0], tuple[1], ... , tuple[n]). Note the comma
	separators, and the use of brackets.

	Args:
	data: The string to be read in.
	delims: A string of two characters giving the first and last character of
	the list format. ',' by default.
	split: The character between different elements of the list format.
	strip: Characters to be removed from the beginning and end of each
	element. ' \n\t' by default.
	arg_type: The strings in the input will be converted, and a tuple
	of ar_type will be returned.

	Raises:
	ValueError: Raised if the input data is not of the correct format.

	Returns:
	A tuple of elements of the specified data type.
	"""

	rlist = read_list(data, delims=delims, split=split, strip=strip)
	return tuple([arg_type(i) for i in rlist])

	def read_dict(data, delims="{}", split=",", key_split=":", strip=" \n\t"):
	"""Reads a formatted string and outputs a dictionary.

	The format is as for standard python dictionaries, which is
	{keyword[0]: arg[0], keyword[1]: arg[1], ... , keyword[n]: arg[n]}. Note the
	comma separators, and the use of curly brackets.

	Args:
	data: The string to be read in.
	delims: A string of two characters giving the first and last character of
	the list format. ',' by default.
	split: The character between different elements of the list format.
	key_split: The character between the key word and the value.
	strip: Characters to be removed from the beginning and end of each
	element. ' \n\t' by default.

	Raises:
	ValueError: Raised if the input data is not of the correct format.

	Returns:
	A dictionary of strings.
	"""

	rlist = read_list(data, delims=delims, split=split, strip=strip)
	def mystrip(data):
	return data.strip(strip)
	rdict = {}
	for s in rlist:
	rtuple = map(mystrip,s.split(key_split))
	if not len(rtuple) == 2:
	raise ValueError("Format for a key:value format is wrong for item " + s)
	rdict[rtuple[0]] = rtuple[1]

	return rdict

	readtype_funcs = {np.ndarray: read_array, dict: read_dict, float: read_float, int: read_int, bool: read_bool, str: string.strip, tuple: read_tuple, np.uint : read_int}

	def write_type(type, data):
	"""Writes a formatted string from a value of a specified type.

	Args:
	type: The data type of the value.
	data: The value to be read in.

	Raises:
	TypeError: Raised if it tries to write from a data type that has not been
	implemented.

	Returns:
	A formatted string.
	"""

	if not type in writetype_funcs:
	raise TypeError("Conversion not available for given type")
	return writetype_funcs[type](data)

	def write_list(data, delims="[]"):
	"""Writes a formatted string from a list.

	The format of the output is as for a standard python list,
	[list[0], list[1],..., list[n]]. Note the space after the commas, and the
	use of square brackets.

	Args:
	data: The value to be read in.
	delims: An optional string of two characters giving the first and last
	character to be printed. Defaults to "[]".

	Returns:
	A formatted string.
	"""

	rstr = delims[0]

	for v in data:
	rstr += str(v) + ", "

	rstr = rstr.rstrip(", ")
	rstr += delims[1]
	return rstr

	def write_tuple(data):
	"""Writes a formatted string from a tuple.

	The format of the output is as for a standard python tuple,
	(tuple[0], tuple[1],..., tuple[n]). Note the space after the commas, and the
	use of brackets.

	Args:
	data: The value to be read in.

	Returns:
	A formatted string.
	"""

	return write_list(data, delims="()")

	def write_float(data):
	"""Writes a formatted string from a float.

	Floats are printed out in exponential format, to 8 decimal places and
	filling up any spaces under 16 not used with spaces.

	For example 1.0 --> ' 1.00000000e+00'

	Args:
	data: The value to be read in.

	Returns:
	A formatted string.
	"""

	return "%16.8e" % (data)

	def write_bool(data):
	"""Writes a formatted string from a float.

	Booleans are printed as a string of either ' true' or 'false'. Note that
	both are printed out as exactly 5 characters.

	Args:
	data: The value to be read in.

	Returns:
	A formatted string.
	"""

	return "%5.5s" % (str(data))

	def write_dict(data, delims="{}"):
	"""Writes a formatted string from a dictionary.

	The format of the output is as for a standard python dictionary,
	{keyword[0]: arg[0], keyword[1]: arg[1],..., keyword[n]: arg[n]}. Note the
	space after the commas, and the use of curly brackets.

	Args:
	data: The value to be read in.
	delims: An optional string of two characters giving the first and last
	character to be printed. Defaults to "{}".

	Returns:
	A formatted string.
	"""

	rstr = delims[0]
	for v in data:
	rstr += str(v) + ": " + str(data[v]) + ", "
	rstr = rstr.strip(", ")
	rstr += delims[1]
	return rstr

	writetype_funcs = {float: write_float, dict: write_dict, int: str, bool: write_bool, str: string.strip, tuple: write_tuple, np.uint : str}

io_xml.pyNo OneTemporaryActions

File Metadata

io_xml.pyView Options

Event Timeline

io_xml.py
No OneTemporary
Actions

io_xml.py
View Options