open_msdataset_source.pyx
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Thu, Jul 11, 03:16

open_msdataset_source.pyx
View Options

	"""
	Class to represent MITgcm mds file storage format.
	"""
	# python 3 compatiblity
	from __future__ import print_function, division

	from glob import glob
	import os
	import re
	import numpy as np
	import warnings
	from io import StringIO
	import inspect
	import xarray as xr
	import dask.array as da

	# we keep the metadata in its own module to keep this one cleaner
	from .variables import dimensions, \
	horizontal_coordinates_spherical, horizontal_coordinates_cartesian, \
	horizontal_coordinates_curvcart, \
	vertical_coordinates, horizontal_grid_variables, vertical_grid_variables, \
	volume_grid_variables, state_variables, aliases
	# would it be better to import mitgcm_variables and then automate the search
	# for variable dictionaries

	from .utils import parse_meta_file, read_mds, parse_available_diagnostics

	# should we hard code this?
	LLC_NUM_FACES = 13
	LLC_FACE_DIMNAME = 'face'

	def open_mdsdataset(data_dir, grid_dir=None,
	iters='all', prefix=None, read_grid=True,
	delta_t=1, ref_date=None, calendar='gregorian',
	geometry='sphericalpolar',
	grid_vars_to_coords=True, swap_dims=None,
	endian=">", chunks=None,
	ignore_unknown_vars=False, default_dtype=None,
	nx=None, ny=None, nz=None,
	llc_method="smallchunks"):
	"""Open MITgcm-style mds (.data / .meta) file output as xarray datset.

	Parameters
	----------
	data_dir : string
	Path to the directory where the mds .data and .meta files are stored
	grid_dir : string, optional
	Path to the directory where the mds .data and .meta files are stored, if
	different from ``data_dir``.
	iters : list, optional
	The iterations numbers of the files to be read. If ``None``, no data
	files will be read. If ``'all'`` (default), all iterations will be read.
	prefix : list, optional
	List of different filename prefixes to read. Default (``None``) is to
	read all available files.
	read_grid : bool, optional
	Whether to read the grid data
	delta_t : number, optional
	The timestep used in the model. (Can't be inferred.)
	ref_date : string, optional
	An iSO date string corresponding to the zero timestep,
	e.g. "1990-1-1 0:0:0" (See CF conventions [1]_)
	calendar : string, optional
	A calendar allowed by CF conventions [1]_
	geometry : {'sphericalpolar', 'cartesian', 'llc', 'curvilinear'}
	MITgcm grid geometry specifier
	grid_vars_to_coords : boolean, optional
	Whether to promote grid variables to coordinate status
	swap_dims : boolean, optional
	Whether to swap the logical dimensions for physical ones. If ``None``,
	will be set to ``False`` for ``geometry==llc`` and ``True`` otherwise.
	endian : {'=', '>', '<'}, optional
	Endianness of variables. Default for MITgcm is ">" (big endian)
	chunks : int or dict, optional
	If chunks is provided, it used to load the new dataset into dask arrays.
	ignore_unknown_vars : boolean, optional
	Don't raise an error if unknown variables are encountered while reading
	the dataset.
	default_dtype : numpy.dtype, optional
	A datatype to fall back on if the metadata can't be read.
	nx, ny, nz : int, optional
	The numerical dimensions of the model. These will be inferred from
	``XC.meta`` and ``RC.meta`` if they are not specified. If
	``geometry==llc``, ``ny`` does not have to specified.
	llc_method : {"smallchunks", "bigchunks"}, optional
	Which routine to use for reading LLC data. "smallchunks" splits the file
	into a individual dask chunk of size (nx x nx) for each face of each
	level (i.e. the total number of chunks is 13 * nz). "bigchunks" loads
	the whole raw data file (either into memory or as a numpy.memmap),
	splits it into faces, and concatenates those faces together using
	``dask.array.concatenate``. The different methods will have different
	memory and i/o performance depending on the details of the system
	configuration.

	Returns
	-------
	dset : xarray.Dataset
	Dataset object containing all coordinates and variables.

	References
	----------
	.. [1] http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/ch04s04.html
	"""

	# get frame info for history
	frame = inspect.currentframe()
	_, _, _, arg_values = inspect.getargvalues(frame)
	del arg_values['frame']
	function_name = inspect.getframeinfo(frame)[2]

	# auto-detect whether to swap dims
	if swap_dims is None:
	if read_grid == False:
	swap_dims = False
	else:
	swap_dims = False if geometry in ('llc', 'curvilinear') else True

	# some checks for argument consistency
	if swap_dims and not read_grid:
	raise ValueError("If swap_dims==True, read_grid must be True.")

	# We either have a single iter, in which case we create a fresh store,
	# or a list of iters, in which case we combine.
	if iters == 'all':
	iters = _get_all_iternums(data_dir, file_prefixes=prefix)
	if iters is None:
	iternum = None
	else:
	try:
	iternum = int(iters)
	# if not we probably have some kind of list
	except TypeError:
	if len(iters) == 1:
	iternum = int(iters[0])
	else:
	# We have to check to make sure we have the same prefixes at
	# each timestep...otherwise we can't combine the datasets.
	first_prefixes = prefix or _get_all_matching_prefixes(
	data_dir, iters[0])
	for iternum in iters:
	these_prefixes = _get_all_matching_prefixes(
	data_dir, iternum, prefix
	)
	# don't care about order
	if set(these_prefixes) != set(first_prefixes):
	raise IOError("Could not find the expected file "
	"prefixes %s at iternum %g. (Instead "
	"found %s)" % (repr(first_prefixes),
	iternum,
	repr(these_prefixes)))

	# chunk at least by time
	chunks = chunks or {}

	# recursively open each dataset at a time
	kwargs = dict(
	grid_dir=grid_dir, delta_t=delta_t, swap_dims=False,
	prefix=prefix, ref_date=ref_date, calendar=calendar,
	geometry=geometry,
	grid_vars_to_coords=False,
	endian=endian, chunks=chunks,
	ignore_unknown_vars=ignore_unknown_vars,
	default_dtype=default_dtype,
	nx=nx, ny=ny, nz=nz, llc_method=llc_method)
	datasets = [open_mdsdataset(
	data_dir, iters=iternum, read_grid=False, **kwargs)
	for iternum in iters]
	# now add the grid
	if read_grid:
	if 'iters' in kwargs:
	kwargs.remove('iters')
	if 'read_grid' in kwargs:
	kwargs.remove('read_grid')
	datasets.insert(0,
	open_mdsdataset(data_dir, iters=None, read_grid=True,
	**kwargs))
	# apply chunking
	ds = xr.auto_combine(datasets)
	if swap_dims:
	ds = _swap_dimensions(ds, geometry)
	if grid_vars_to_coords:
	ds = _set_coords(ds)
	return ds

	store = _MDSDataStore(data_dir, grid_dir, iternum, delta_t, read_grid,
	prefix, ref_date, calendar,
	geometry, endian,
	ignore_unknown_vars=ignore_unknown_vars,
	default_dtype=default_dtype,
	nx=nx, ny=ny, nz=nz, llc_method=llc_method)
	ds = xr.Dataset.load_store(store)

	if swap_dims:
	ds = _swap_dimensions(ds, geometry)
	if grid_vars_to_coords:
	ds = _set_coords(ds)

	if ref_date:
	ds = xr.decode_cf(ds)

	# do we need more fancy logic (like open_dataset), or is this enough
	if chunks is not None:
	ds = ds.chunk(chunks)

	# set attributes for CF conventions
	ds.attrs['Conventions'] = "CF-1.6"
	ds.attrs['title'] = "netCDF wrapper of MITgcm MDS binary data"
	ds.attrs['source'] = "MITgcm"
	arg_string = ', '.join(['%s=%s' % (str(k), repr(v))
	for (k, v) in arg_values.items()])
	ds.attrs['history'] = ('Created by calling '
	'`%s(%s)`'% (function_name, arg_string))

	return ds

open_msdataset_source.pyxNo OneTemporaryActions

File Metadata

open_msdataset_source.pyxView Options

Event Timeline

open_msdataset_source.pyx
No OneTemporary
Actions

open_msdataset_source.pyx
View Options