Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91967845
io_xml.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Nov 16, 05:48
Size
15 KB
Mime Type
text/x-python
Expires
Mon, Nov 18, 05:48 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
22354440
Attached To
rLAMMPS lammps
io_xml.py
View Options
"""Contains the functions used to read the input file and print the checkpoint
files with xml formatting.
Copyright (C) 2013, Joshua More and Michele Ceriotti
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http.//www.gnu.org/licenses/>.
Functions:
xml_node: Class to handle a particular xml tag.
xml_handler: Class giving general xml data reading methods.
xml_parse_string: Parses a string made from a section of a xml input file.
xml_parse_file: Parses an entire xml input file.
read_type: Reads a string and outputs data of a specified type.
read_float: Reads a string and outputs a float.
read_int: Reads a string and outputs an integer.
read_bool: Reads a string and outputs a boolean.
read_list: Reads a string and outputs a list.
read_array: Reads a string and outputs an array.
read_tuple: Reads a string and outputs a tuple.
read_dict: Reads a string and outputs a dictionary.
write_type: Writes a string from data of a specified type.
write_list: Writes a string from a list.
write_tuple: Writes a string from a tuple.
write_float: Writes a string from a float.
write_bool: Writes a string from a boolean.
write_dict: Writes a string from a dictionary.
"""
__all__
=
[
'xml_node'
,
'xml_handler'
,
'xml_parse_string'
,
'xml_parse_file'
,
'read_type'
,
'read_float'
,
'read_int'
,
'read_bool'
,
'read_list'
,
'read_array'
,
'read_tuple'
,
'read_dict'
,
'write_type'
,
'write_list'
,
'write_tuple'
,
'write_float'
,
'write_bool'
,
'write_dict'
]
from
xml.sax
import
parseString
,
parse
from
xml.sax.handler
import
ContentHandler
import
numpy
as
np
import
string
class
xml_node
(
object
):
"""Class to handle a particular xml tag.
Tags are generally written in the form
<tag_name attribs="attrib_data"> main_data </tag_name>. This class holds
tag_name, attrib_data and main_data separately so they can be used to
create the objects with the appropriate names and data.
Attributes:
attribs: The attribute data for the tag.
fields: The rest of the data.
name: The tag name.
"""
def
__init__
(
self
,
attribs
=
None
,
name
=
""
,
fields
=
None
):
"""Initialises xml_node.
Args:
attribs: An optional dictionary giving attribute data. Defaults to {}.
fields: An optional dictionary holding all the data between the start
and end tags, including information about other nodes.
Defaults to {}.
name: An optional string giving the tag name. Defaults to ''.
"""
if
attribs
is
None
:
attribs
=
{}
if
fields
is
None
:
fields
=
[]
self
.
attribs
=
attribs
self
.
name
=
name
self
.
fields
=
fields
class
xml_handler
(
ContentHandler
):
"""Class giving general xml_reading methods.
Uses the standard python xml_reader to read the different kinds of data.
Keeps track of the heirarchial nature of an xml file by recording the level
of nesting, so that the correct data and attributes can be associated with
the correct tag name.
Attributes:
root: An xml_node object for the root node.
open: The list of the tags that the parser is currently between the start
and end tags of.
level: The level of nesting that the parser is currently at.
buffer: A list of the data found between the tags at the different levels
of nesting.
"""
def
__init__
(
self
):
"""Initialises xml_handler."""
#root xml node with all the data
self
.
root
=
xml_node
(
name
=
"root"
,
fields
=
[])
self
.
open
=
[
self
.
root
]
#current level of the hierarchy
self
.
level
=
0
#Holds all the data between each of the tags.
#If level = 1, then buffer[0] holds all the data collected between the
#root tags, and buffer[1] holds all the data collected between the
#first child tag.
self
.
buffer
=
[[
""
]]
def
startElement
(
self
,
name
,
attrs
):
"""Reads an opening tag.
Adds the opening tag to the list of open tags, adds a new space in the
buffer, reads the appropriate attributes and adds a new level to the
heirarchy.
Args:
name: The tag_name.
attrs: The attribute data.
"""
#creates a new node
newnode
=
xml_node
(
attribs
=
dict
((
k
,
attrs
[
k
])
for
k
in
attrs
.
keys
()),
name
=
name
,
fields
=
[])
#adds it to the list of open nodes
self
.
open
.
append
(
newnode
)
#adds it to the list of fields of the parent tag
self
.
open
[
self
.
level
]
.
fields
.
append
((
name
,
newnode
))
#gets ready to read new data
self
.
buffer
.
append
([
""
])
self
.
level
+=
1
def
characters
(
self
,
data
):
"""Reads data.
Adds the data to the buffer of the current level of the heirarchy.
Data is read as a string, and needs to be converted to the required
type later.
Args:
data: The data to be read.
"""
self
.
buffer
[
self
.
level
]
.
append
(
data
)
def
endElement
(
self
,
name
):
"""Reads a closing tag.
Once all the data has been read, and the closing tag found, the buffer
is read into the appropriate field.
Args:
name: The tag_name.
"""
#all the text found between the tags stored in the appropriate xml_node
#object
self
.
buffer
[
self
.
level
]
=
''
.
join
(
self
.
buffer
[
self
.
level
])
self
.
open
[
self
.
level
]
.
fields
.
append
((
"_text"
,
self
.
buffer
[
self
.
level
]))
#'closes' the xml_node object, as we are no longer within its tags, so
#there is no more data to be added to it.
#Note that the xml_node is still held within the parent tag, so we
#no longer require this xml node object.
self
.
buffer
.
pop
(
self
.
level
)
self
.
open
.
pop
(
self
.
level
)
self
.
level
-=
1
def
xml_parse_string
(
buf
):
"""Parses a string made from a section of a xml input file.
Args:
buf: A string in correct xml format.
Returns:
A xml_node for the root node of the file.
"""
myhandle
=
xml_handler
()
parseString
(
buf
,
myhandle
)
return
myhandle
.
root
def
xml_parse_file
(
stream
):
"""Parses an entire xml input file.
Args:
stream: A string describing a xml formatted file.
Returns:
A xml_node for the root node of the file.
"""
myhandle
=
xml_handler
()
parse
(
stream
,
myhandle
)
return
myhandle
.
root
def
read_type
(
type
,
data
):
"""Reads a string and outputs data of a specified type.
Args:
type: The data type of the target container.
data: The string to be read in.
Raises:
TypeError: Raised if it tries to read into a data type that has not been
implemented.
Returns:
An object of type type.
"""
if
not
type
in
readtype_funcs
:
raise
TypeError
(
"Conversion not available for given type"
)
return
type
(
readtype_funcs
[
type
](
data
))
def
read_float
(
data
):
"""Reads a string and outputs a float.
Args:
data: The string to be read in.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
A float.
"""
return
float
(
data
)
def
read_int
(
data
):
"""Reads a string and outputs a integer.
Args:
data: The string to be read in.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
An integer.
"""
return
int
(
data
)
def
read_bool
(
data
):
"""Reads a string and outputs a boolean.
Takes a string of the form 'true' or 'false', and returns the appropriate
boolean.
Args:
data: The string to be read in.
Raises:
ValueError: Raised if the string is not 'true' or 'false'.
Returns:
A boolean.
"""
if
data
.
strip
()
.
upper
()
==
"TRUE"
:
return
True
elif
data
.
strip
()
.
upper
()
==
"FALSE"
:
return
False
else
:
raise
ValueError
(
data
+
" does not represent a bool value"
)
def
read_list
(
data
,
delims
=
"[]"
,
split
=
","
,
strip
=
"
\n\t
'"
):
"""Reads a formatted string and outputs a list.
The string must be formatted in the correct way.
The start character must be delimiters[0], the end character
must be delimiters[1] and each element must be split along
the character split. Characters at the beginning or
end of each element in strip are ignored. The standard list format is of the
form '[array[0], array[1],..., array[n]]', which is used for actual lists.
Other formats are used for tuples and dictionaries.
Args:
data: The string to be read in. '[]' by default.
delims: A string of two characters giving the first and last character of
the list format. ',' by default.
split: The character between different elements of the list format.
strip: Characters to be removed from the beginning and end of each
element. ' \n\t' by default.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
A list of strings.
"""
try
:
begin
=
data
.
index
(
delims
[
0
])
end
=
data
.
index
(
delims
[
1
])
except
ValueError
:
raise
ValueError
(
"Error in list syntax: could not locate delimiters"
)
rlist
=
data
[
begin
+
1
:
end
]
.
split
(
split
)
for
i
in
range
(
len
(
rlist
)):
rlist
[
i
]
=
rlist
[
i
]
.
strip
(
strip
)
# handles empty lists correctly
if
len
(
rlist
)
==
1
and
rlist
[
0
]
==
""
:
rlist
=
[]
return
rlist
def
read_array
(
dtype
,
data
):
"""Reads a formatted string and outputs an array.
The format is as for standard python arrays, which is
[array[0], array[1], ... , array[n]]. Note the use of comma separators, and
the use of square brackets.
Args:
data: The string to be read in.
dtype: The data type of the elements of the target array.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
An array of data type dtype.
"""
rlist
=
read_list
(
data
)
for
i
in
range
(
len
(
rlist
)):
rlist
[
i
]
=
read_type
(
dtype
,
rlist
[
i
])
return
np
.
array
(
rlist
,
dtype
)
def
read_tuple
(
data
,
delims
=
"()"
,
split
=
","
,
strip
=
"
\n\t
'"
,
arg_type
=
int
):
"""Reads a formatted string and outputs a tuple.
The format is as for standard python tuples, which is
(tuple[0], tuple[1], ... , tuple[n]). Note the comma
separators, and the use of brackets.
Args:
data: The string to be read in.
delims: A string of two characters giving the first and last character of
the list format. ',' by default.
split: The character between different elements of the list format.
strip: Characters to be removed from the beginning and end of each
element. ' \n\t' by default.
arg_type: The strings in the input will be converted, and a tuple
of ar_type will be returned.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
A tuple of elements of the specified data type.
"""
rlist
=
read_list
(
data
,
delims
=
delims
,
split
=
split
,
strip
=
strip
)
return
tuple
([
arg_type
(
i
)
for
i
in
rlist
])
def
read_dict
(
data
,
delims
=
"{}"
,
split
=
","
,
key_split
=
":"
,
strip
=
"
\n\t
"
):
"""Reads a formatted string and outputs a dictionary.
The format is as for standard python dictionaries, which is
{keyword[0]: arg[0], keyword[1]: arg[1], ... , keyword[n]: arg[n]}. Note the
comma separators, and the use of curly brackets.
Args:
data: The string to be read in.
delims: A string of two characters giving the first and last character of
the list format. ',' by default.
split: The character between different elements of the list format.
key_split: The character between the key word and the value.
strip: Characters to be removed from the beginning and end of each
element. ' \n\t' by default.
Raises:
ValueError: Raised if the input data is not of the correct format.
Returns:
A dictionary of strings.
"""
rlist
=
read_list
(
data
,
delims
=
delims
,
split
=
split
,
strip
=
strip
)
def
mystrip
(
data
):
return
data
.
strip
(
strip
)
rdict
=
{}
for
s
in
rlist
:
rtuple
=
map
(
mystrip
,
s
.
split
(
key_split
))
if
not
len
(
rtuple
)
==
2
:
raise
ValueError
(
"Format for a key:value format is wrong for item "
+
s
)
rdict
[
rtuple
[
0
]]
=
rtuple
[
1
]
return
rdict
readtype_funcs
=
{
np
.
ndarray
:
read_array
,
dict
:
read_dict
,
float
:
read_float
,
int
:
read_int
,
bool
:
read_bool
,
str
:
string
.
strip
,
tuple
:
read_tuple
,
np
.
uint
:
read_int
}
def
write_type
(
type
,
data
):
"""Writes a formatted string from a value of a specified type.
Args:
type: The data type of the value.
data: The value to be read in.
Raises:
TypeError: Raised if it tries to write from a data type that has not been
implemented.
Returns:
A formatted string.
"""
if
not
type
in
writetype_funcs
:
raise
TypeError
(
"Conversion not available for given type"
)
return
writetype_funcs
[
type
](
data
)
def
write_list
(
data
,
delims
=
"[]"
):
"""Writes a formatted string from a list.
The format of the output is as for a standard python list,
[list[0], list[1],..., list[n]]. Note the space after the commas, and the
use of square brackets.
Args:
data: The value to be read in.
delims: An optional string of two characters giving the first and last
character to be printed. Defaults to "[]".
Returns:
A formatted string.
"""
rstr
=
delims
[
0
]
for
v
in
data
:
rstr
+=
str
(
v
)
+
", "
rstr
=
rstr
.
rstrip
(
", "
)
rstr
+=
delims
[
1
]
return
rstr
def
write_tuple
(
data
):
"""Writes a formatted string from a tuple.
The format of the output is as for a standard python tuple,
(tuple[0], tuple[1],..., tuple[n]). Note the space after the commas, and the
use of brackets.
Args:
data: The value to be read in.
Returns:
A formatted string.
"""
return
write_list
(
data
,
delims
=
"()"
)
def
write_float
(
data
):
"""Writes a formatted string from a float.
Floats are printed out in exponential format, to 8 decimal places and
filling up any spaces under 16 not used with spaces.
For example 1.0 --> ' 1.00000000e+00'
Args:
data: The value to be read in.
Returns:
A formatted string.
"""
return
"
%16.8e
"
%
(
data
)
def
write_bool
(
data
):
"""Writes a formatted string from a float.
Booleans are printed as a string of either ' true' or 'false'. Note that
both are printed out as exactly 5 characters.
Args:
data: The value to be read in.
Returns:
A formatted string.
"""
return
"
%5.5s
"
%
(
str
(
data
))
def
write_dict
(
data
,
delims
=
"{}"
):
"""Writes a formatted string from a dictionary.
The format of the output is as for a standard python dictionary,
{keyword[0]: arg[0], keyword[1]: arg[1],..., keyword[n]: arg[n]}. Note the
space after the commas, and the use of curly brackets.
Args:
data: The value to be read in.
delims: An optional string of two characters giving the first and last
character to be printed. Defaults to "{}".
Returns:
A formatted string.
"""
rstr
=
delims
[
0
]
for
v
in
data
:
rstr
+=
str
(
v
)
+
": "
+
str
(
data
[
v
])
+
", "
rstr
=
rstr
.
strip
(
", "
)
rstr
+=
delims
[
1
]
return
rstr
writetype_funcs
=
{
float
:
write_float
,
dict
:
write_dict
,
int
:
str
,
bool
:
write_bool
,
str
:
string
.
strip
,
tuple
:
write_tuple
,
np
.
uint
:
str
}
Event Timeline
Log In to Comment