Page MenuHomec4science

io_xml.py
No OneTemporary

File Metadata

Created
Sat, Oct 12, 16:23

io_xml.py

"""Contains the functions used to read the input file and print the checkpoint
files with xml formatting.
Copyright (C) 2013, Joshua More and Michele Ceriotti
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http.//www.gnu.org/licenses/>.
Functions:
xml_node: Class to handle a particular xml tag.
xml_handler: Class giving general xml data reading methods.
xml_parse_string: Parses a string made from a section of a xml input file.
xml_parse_file: Parses an entire xml input file.
read_type: Reads a string and outputs data of a specified type.
read_float: Reads a string and outputs a float.
read_int: Reads a string and outputs an integer.
read_bool: Reads a string and outputs a boolean.
read_list: Reads a string and outputs a list.
read_array: Reads a string and outputs an array.
read_tuple: Reads a string and outputs a tuple.
read_dict: Reads a string and outputs a dictionary.
write_type: Writes a string from data of a specified type.
write_list: Writes a string from a list.
write_tuple: Writes a string from a tuple.
write_float: Writes a string from a float.
write_bool: Writes a string from a boolean.
write_dict: Writes a string from a dictionary.
"""
__all__ = ['xml_node', 'xml_handler', 'xml_parse_string', 'xml_parse_file',
'read_type', 'read_float', 'read_int', 'read_bool', 'read_list',
'read_array', 'read_tuple', 'read_dict', 'write_type', 'write_list',
'write_tuple', 'write_float', 'write_bool', 'write_dict']
from xml.sax import parseString, parse
from xml.sax.handler import ContentHandler
import numpy as np
import string
class xml_node(object):
"""Class to handle a particular xml tag.
Tags are generally written in the form
<tag_name attribs="attrib_data"> main_data </tag_name>. This class holds
tag_name, attrib_data and main_data separately so they can be used to
create the objects with the appropriate names and data.
Attributes:
attribs: The attribute data for the tag.
fields: The rest of the data.
name: The tag name.
"""
def __init__(self, attribs=None, name="", fields=None):
"""Initialises xml_node.
Args:
attribs: An optional dictionary giving attribute data. Defaults to {}.
fields: An optional dictionary holding all the data between the start
and end tags, including information about other nodes.
Defaults to {}.
name: An optional string giving the tag name. Defaults to ''.
"""
if attribs is None:
attribs = {}
if fields is None:
fields = []
self.attribs = attribs
self.name = name
self.fields = fields
class xml_handler(ContentHandler):
"""Class giving general xml_reading methods.
Uses the standard python xml_reader to read the different kinds of data.
Keeps track of the heirarchial nature of an xml file by recording the level
of nesting, so that the correct data and attributes can be associated with
the correct tag name.
Attributes:
root: An xml_node object for the root node.
open: The list of the tags that the parser is currently between the start
and end tags of.
level: The level of nesting that the parser is currently at.
buffer: A list of the data found between the tags at the different levels
of nesting.
"""
def __init__(self):
"""Initialises xml_handler."""
#root xml node with all the data
self.root = xml_node(name="root", fields=[])
self.open = [self.root]
#current level of the hierarchy
self.level = 0
#Holds all the data between each of the tags.
#If level = 1, then buffer[0] holds all the data collected between the
#root tags, and buffer[1] holds all the data collected between the
#first child tag.
self.buffer = [[""]]
def startElement(self, name, attrs):
"""Reads an opening tag.
Adds the opening tag to the list of open tags, adds a new space in the
buffer, reads the appropriate attributes and adds a new level to the
heirarchy.
Args:
name: The tag_name.
attrs: The attribute data.
"""
#creates a new node
newnode = xml_node(attribs=dict((k,attrs[k]) for k in attrs.keys()), name=name, fields=[])
#adds it to the list of open nodes
self.open.append(newnode)
#adds it to the list of fields of the parent tag
self.open[self.level].fields.append((name,newnode))
#gets ready to read new data
self.buffer.append([""])
self.level += 1
def characters(self, data):
"""Reads data.
Adds the data to the buffer of the current level of the heirarchy.
Data is read as a string, and needs to be converted to the required
type later.
Args:
data: The data to be read.
"""
self.buffer[self.level].append(data)
def endElement(self, name):
"""Reads a closing tag.
Once all the data has been read, and the closing tag found, the buffer
is read into the appropriate field.
Args:
name: The tag_name.
"""
#all the text found between the tags stored in the appropriate xml_node
#object
self.buffer[self.level] = ''.join(self.buffer[self.level])
self.open[self.level].fields.append(("_text" , self.buffer[self.level]))
#'closes' the xml_node object, as we are no longer within its tags, so
#there is no more data to be added to it.
#Note that the xml_node is still held within the parent tag, so we
#no longer require this xml node object.
self.buffer.pop(self.level)
self.open.pop(self.level)
self.level -= 1
def xml_parse_string(buf):
"""Parses a string made from a section of a xml input file.
Args:
buf: A string in correct xml format.
Returns:
A xml_node for the root node of the file.
"""
myhandle = xml_handler()
parseString(buf, myhandle)
return myhandle.root
def xml_parse_file(stream):
"""Parses an entire xml input file.
Args:
stream: A string describing a xml formatted file.
Returns:
A xml_node for the root node of the file.
"""
myhandle = xml_handler()
parse(stream, myhandle)
return myhandle.root
def read_type(type, data):
"""Reads a string and outputs data of a specified type.
Args:
type: The data type of the target container.
data: The string to be read in.
Raises:
TypeError: Raised if it tries to read into a data type that has not been
implemented.
Returns:
An object of type type.
"""
if not type in readtype_funcs:
raise TypeError("Conversion not available for given type")
return type(readtype_funcs[type](data))
def read_float(data):
"""Reads a string and outputs a float.
Args:
data: The string to be read in.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
A float.
"""
return float(data)
def read_int(data):
"""Reads a string and outputs a integer.
Args:
data: The string to be read in.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
An integer.
"""
return int(data)
def read_bool(data):
"""Reads a string and outputs a boolean.
Takes a string of the form 'true' or 'false', and returns the appropriate
boolean.
Args:
data: The string to be read in.
Raises:
ValueError: Raised if the string is not 'true' or 'false'.
Returns:
A boolean.
"""
if data.strip().upper() == "TRUE":
return True
elif data.strip().upper() == "FALSE":
return False
else:
raise ValueError(data + " does not represent a bool value")
def read_list(data, delims="[]", split=",", strip=" \n\t'"):
"""Reads a formatted string and outputs a list.
The string must be formatted in the correct way.
The start character must be delimiters[0], the end character
must be delimiters[1] and each element must be split along
the character split. Characters at the beginning or
end of each element in strip are ignored. The standard list format is of the
form '[array[0], array[1],..., array[n]]', which is used for actual lists.
Other formats are used for tuples and dictionaries.
Args:
data: The string to be read in. '[]' by default.
delims: A string of two characters giving the first and last character of
the list format. ',' by default.
split: The character between different elements of the list format.
strip: Characters to be removed from the beginning and end of each
element. ' \n\t' by default.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
A list of strings.
"""
try:
begin = data.index(delims[0])
end = data.index(delims[1])
except ValueError:
raise ValueError("Error in list syntax: could not locate delimiters")
rlist = data[begin+1:end].split(split)
for i in range(len(rlist)):
rlist[i] = rlist[i].strip(strip)
# handles empty lists correctly
if len(rlist) == 1 and rlist[0] == "":
rlist = []
return rlist
def read_array(dtype, data):
"""Reads a formatted string and outputs an array.
The format is as for standard python arrays, which is
[array[0], array[1], ... , array[n]]. Note the use of comma separators, and
the use of square brackets.
Args:
data: The string to be read in.
dtype: The data type of the elements of the target array.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
An array of data type dtype.
"""
rlist = read_list(data)
for i in range(len(rlist)):
rlist[i] = read_type(dtype,rlist[i])
return np.array(rlist, dtype)
def read_tuple(data, delims="()", split=",", strip=" \n\t'", arg_type=int):
"""Reads a formatted string and outputs a tuple.
The format is as for standard python tuples, which is
(tuple[0], tuple[1], ... , tuple[n]). Note the comma
separators, and the use of brackets.
Args:
data: The string to be read in.
delims: A string of two characters giving the first and last character of
the list format. ',' by default.
split: The character between different elements of the list format.
strip: Characters to be removed from the beginning and end of each
element. ' \n\t' by default.
arg_type: The strings in the input will be converted, and a tuple
of ar_type will be returned.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
A tuple of elements of the specified data type.
"""
rlist = read_list(data, delims=delims, split=split, strip=strip)
return tuple([arg_type(i) for i in rlist])
def read_dict(data, delims="{}", split=",", key_split=":", strip=" \n\t"):
"""Reads a formatted string and outputs a dictionary.
The format is as for standard python dictionaries, which is
{keyword[0]: arg[0], keyword[1]: arg[1], ... , keyword[n]: arg[n]}. Note the
comma separators, and the use of curly brackets.
Args:
data: The string to be read in.
delims: A string of two characters giving the first and last character of
the list format. ',' by default.
split: The character between different elements of the list format.
key_split: The character between the key word and the value.
strip: Characters to be removed from the beginning and end of each
element. ' \n\t' by default.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
A dictionary of strings.
"""
rlist = read_list(data, delims=delims, split=split, strip=strip)
def mystrip(data):
return data.strip(strip)
rdict = {}
for s in rlist:
rtuple = map(mystrip,s.split(key_split))
if not len(rtuple) == 2:
raise ValueError("Format for a key:value format is wrong for item " + s)
rdict[rtuple[0]] = rtuple[1]
return rdict
readtype_funcs = {np.ndarray: read_array, dict: read_dict, float: read_float, int: read_int, bool: read_bool, str: string.strip, tuple: read_tuple, np.uint : read_int}
def write_type(type, data):
"""Writes a formatted string from a value of a specified type.
Args:
type: The data type of the value.
data: The value to be read in.
Raises:
TypeError: Raised if it tries to write from a data type that has not been
implemented.
Returns:
A formatted string.
"""
if not type in writetype_funcs:
raise TypeError("Conversion not available for given type")
return writetype_funcs[type](data)
def write_list(data, delims="[]"):
"""Writes a formatted string from a list.
The format of the output is as for a standard python list,
[list[0], list[1],..., list[n]]. Note the space after the commas, and the
use of square brackets.
Args:
data: The value to be read in.
delims: An optional string of two characters giving the first and last
character to be printed. Defaults to "[]".
Returns:
A formatted string.
"""
rstr = delims[0]
for v in data:
rstr += str(v) + ", "
rstr = rstr.rstrip(", ")
rstr += delims[1]
return rstr
def write_tuple(data):
"""Writes a formatted string from a tuple.
The format of the output is as for a standard python tuple,
(tuple[0], tuple[1],..., tuple[n]). Note the space after the commas, and the
use of brackets.
Args:
data: The value to be read in.
Returns:
A formatted string.
"""
return write_list(data, delims="()")
def write_float(data):
"""Writes a formatted string from a float.
Floats are printed out in exponential format, to 8 decimal places and
filling up any spaces under 16 not used with spaces.
For example 1.0 --> ' 1.00000000e+00'
Args:
data: The value to be read in.
Returns:
A formatted string.
"""
return "%16.8e" % (data)
def write_bool(data):
"""Writes a formatted string from a float.
Booleans are printed as a string of either ' true' or 'false'. Note that
both are printed out as exactly 5 characters.
Args:
data: The value to be read in.
Returns:
A formatted string.
"""
return "%5.5s" % (str(data))
def write_dict(data, delims="{}"):
"""Writes a formatted string from a dictionary.
The format of the output is as for a standard python dictionary,
{keyword[0]: arg[0], keyword[1]: arg[1],..., keyword[n]: arg[n]}. Note the
space after the commas, and the use of curly brackets.
Args:
data: The value to be read in.
delims: An optional string of two characters giving the first and last
character to be printed. Defaults to "{}".
Returns:
A formatted string.
"""
rstr = delims[0]
for v in data:
rstr += str(v) + ": " + str(data[v]) + ", "
rstr = rstr.strip(", ")
rstr += delims[1]
return rstr
writetype_funcs = {float: write_float, dict: write_dict, int: str, bool: write_bool, str: string.strip, tuple: write_tuple, np.uint : str}

Event Timeline