#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Data model conceptually similar to HDF5 and CDF.
For more documentation :doc:`../datamodel`
Authors: Steve Morley and Brian Larsen
Additional Contributors: Charles Kiyanda and Miles Engel
Institution: Los Alamos National Laboratory
Contact: smorley@lanl.gov; balarsen@lanl.gov
Copyright 2010-2016 Los Alamos National Security, LLC.
"""
import collections.abc
import copy
import datetime
import gzip
import io
import itertools
import json
from functools import partial
import os
import re
import warnings
import numpy
# from . import toolbox # handled in functions that use it
__contact__ = 'Steve Morley, smorley@lanl.gov'
str_classes = (str, bytes)
[docs]
class DMWarning(Warning):
"""
Warnings class for datamodel, subclassed so it can be set to always
"""
pass
warnings.simplefilter('always', DMWarning)
[docs]
class ISTPArray:
"""Mixin class for array using ISTP metadata.
Array types like `dmarray` provide all these methods; they assume
attributes of the array use the
`ISTP metadata standard <https://spdf.gsfc.nasa.gov/sp_use_of_cdf.html>`_
and are unlikely to give good results if that is not the case.
Note that some operations that may seem to relate to an array (e.g.
uncertainties) may require the use of other arrays in a container;
these are in `ISTPContainer`.
.. versionadded:: 0.5.0
"""
attrs: collections.abc.Mapping
[docs]
def replace_invalid(self):
"""Return data from array with invalid values replaced by `~numpy.nan`.
Makes a copy of the data and, for any values equal to the
``FILLLVAL`` attribute, greater than ``VALIDMAX``, or less than
``VALIDMIN``, replace with NaN.
Returns
-------
`~numpy.ndarray`
Transformed data
See Also
--------
.pycdf.istp.nanfill : an in-place variant
Notes
-----
.. versionadded:: 0.5.0
Comparisons with ``FILLVAL`` are done using `~numpy.isclose` and
so may replace values that are near, but not identical, to fill.
"""
data = numpy.array(self)
idx = numpy.zeros_like(data, dtype=bool)
if self.attrs.get('FILLVAL') is not None:
idx |= numpy.isclose(data, self.attrs['FILLVAL'])
if self.attrs.get('VALIDMIN') is not None:
idx |= data < self.attrs['VALIDMIN']
if self.attrs.get('VALIDMAX') is not None:
idx |= data > self.attrs['VALIDMAX']
data[idx] = numpy.nan
return data
[docs]
def plot_as_line(self):
"""Determines if this array is better plotted as a lineplot or spectrogram.
Uses array shape and the ``DISPLAY_TYPE`` attribute to determine
if should be plotted as a lineplot (potentially stacked) or spectrogram.
Returns
-------
`bool`
``True`` if should be a lineplot, ``False`` if should be a
spectrogram
Notes
-----
.. versionadded:: 0.5.0
"""
if 'DISPLAY_TYPE' in self.attrs:
return self.attrs['DISPLAY_TYPE'] == 'time_series'
dims = len(self.shape)
if dims == 1:
return True
if dims > 2:
return True
# Reasonable dividing line is probably 4 stacked line plots
return self.shape[-1] < 5
[docs]
def units(self, fmt='minimal'):
"""Finds units of array.
Looks up the unit attribute and performs minor cleanup (including
intepreting IDL codes).
Returns
-------
`str`
Physical units of this array. `None` if not present.
Parameters
----------
fmt : {'minimal', 'latex', 'astropy', 'raw'}
How to format the units: ``minimal`` (default) is a
minimally-processed rendering, ``latex`` is in LaTeX,
``astropy`` is meant to give good results when passed to
`astropy.units.Unit`, and ``raw`` has no processing. No
checks are done on processing for AstroPy or LaTeX, and it
should not be assumed they will parse.
Notes
-----
.. versionadded:: 0.6.0
"""
u = self.attrs.get('UNITS', None)
if fmt == 'raw' or u is None:
return u
if fmt == 'astropy':
u = u.replace('#', '1')
elif fmt == 'latex':
u = u.replace('#', '\\#')
if fmt in ('minimal', 'astropy'):
u = re.sub(r'![EU]([^!]*)!N', r'^\1', u) # IDL to exponent
u = re.sub(r'\^{([^!]*)}', r'^\1', u) # LaTeX to exponent
if fmt == 'minimal':
u = re.sub(r'(?<=\d)(?=[\w^_])', r' ', u) # Insert spaces
if fmt == 'astropy':
# Common substitutions
for orig, ap in (('ster', 'sr'),
('cc', 'cm^3'),
):
u = re.sub(fr'((?<=[\W\d])|^){orig}(?=[\W\d]|$)', ap, u)
if fmt == 'latex':
u = re.sub(r'![EU]([^!]*)!N', r'^{\1}', u) # IDL to exponent
return u
[docs]
def toQuantity(self, copy=True):
"""Convert to Astropy Quantity
Converts this array to an Astropy `~astropy.units.Quantity`.
Invalid values are replaced with `~numpy.nan`.
Returns
-------
`~astropy.units.Quantity`
Data from this array interpreted according to its ``UNITS``
attribute.
Other Parameters
----------------
copy : `bool`, default ``True``
Copy data to the Quantity. If ``False``, changes to the
Quantity may affect the source array. In some cases a copy
may be made even if ``False``.
Notes
-----
.. versionadded:: 0.6.0
Examples
--------
>>> import spacepy.datamodel
# https://rbsp-ect.newmexicoconsortium.org/data_pub/rbspa/ECT/level2/
>>> data = spacepy.datamodel.fromCDF(
... 'rbspa_ect-elec-L2_20140115_v2.1.0.cdf')
>>> q = data['Position'].toQuantity()
>>> q.to('m')
<Quantity [[-32833200. , -15531762. , -6449212. ],
[-32903586. , -15406271. , -6448704.5],
[-32967848. , -15277711. , -6446542.5],
...,
[-20966128. , 6941849.5, -2896334.2],
[-21515586. , 6858618. , -3026324. ],
[-22047328. , 6783260.5, -3153003.5]] m>
"""
import astropy.units
data = self.replace_invalid() # makes copy
if not numpy.isnan(data).any() and not copy:
data = self[...]
q = astropy.units.Quantity(data, self.units(fmt='astropy'), copy=False)
return q
[docs]
@classmethod
def fromQuantity(cls, q, copy=True):
"""Convert from Astropy Quantity
Converts an Astropy `~astropy.units.Quantity` to an ISTP array.
`~numpy.nan` are replaced with fill.
Parameters
----------
q : `~astropy.units.Quantity`
Quantity to convert
Returns
-------
`ISTPArray`
Array with attributes which can be inferred from input.
This may not be fully ISTP-compliant.
Other Parameters
----------------
copy : `bool`, default ``True``
Copy data from the Quantity. If ``False``, changes to the
Quantity may affect this array. In some cases a copy
may be made even if ``False``.
Notes
-----
.. versionadded:: 0.6.0
"""
import astropy.units
fill = numpy.isnan(q.value)
if fill.any():
copy = True
data = q.value.copy() if copy else q.value
data[fill] = -1e31 # Quantities are always float
s = q.unit.si
# Force scientific notation, remove superfluous signs and zeros
scale = re.sub(r'0*e\+?0*', 'e', f'{s.scale:#e}', count=1)
# Unscaled formatter is deprecated, so strip the scale the hard way
unscaled = (s / astropy.units.Unit(s.scale)).to_string()
# and remove extra spaces around operators
unscaled = re.sub(r'\s+([^\w])\s+', r'\1', unscaled)
attrs = {
'FILLVAL': -1e31,
'SI_Conversion': f'{scale}>{unscaled}',
'UNITS': q.unit.to_string(),
}
out = cls(data, attrs)
return out
[docs]
class ISTPContainer(collections.abc.Mapping):
"""Mixin class for containers using ISTP metadata.
Container types like `SpaceData` provide all these methods; they assume
attributes of the container and the arrays it contains use the
`ISTP metadata standard <https://spdf.gsfc.nasa.gov/sp_use_of_cdf.html>`_
and are unlikely to give good results if that is not the case.
.. versionadded:: 0.5.0
"""
attrs: collections.abc.Mapping
[docs]
def lineplot(self, vname, target=None):
"""Line plot of a value (array) from this container
Parameters
----------
vname : `str`
The key into this container of the value to plot (i.e.,
the name of the variable).
target : `matplotlib.axes.Axes` or `matplotlib.figure.Figure`, optional
Where to draw the plot. Default is to create a new figure with
a single subplot. If ``Axes``, will draw into that subplot (and
will not draw a legend or figure title); if ``Figure``, will
make a single subplot (and not set figure title). Handled by
`~.plot.utils.set_target`.
Returns
-------
ax : `matplotlib.axes.Axes`
The subplot on which the variable was plotted
Notes
-----
.. versionadded:: 0.5.0
"""
import spacepy.plot.utils
v = self[vname]
fig, ax = spacepy.plot.utils.set_target(target)
x = self[v.attrs['DEPEND_0']]
data = v.replace_invalid()
labels = None
if v.attrs.get('LABL_PTR_1'):
labels = self[v.attrs['LABL_PTR_1']]
deltas = self.get_deltas(vname)
plot_kwargs = {}
if len(data.shape) == 1:
data = data[..., None]
if deltas and len(deltas[0].shape) == 1:
deltas = tuple([d[..., None] for d in deltas])
for dim in range(data.shape[-1]):
if labels is not None:
plot_kwargs['label'] = labels[dim]
if deltas:
if len(deltas) == 1:
yerr = deltas[0][:, dim]
else:
yerr = numpy.stack((deltas[0][:, dim], deltas[1][:, dim]))
ax.errorbar(numpy.array(x), data[:, dim], yerr=yerr, **plot_kwargs)
else:
ax.plot(numpy.array(x), data[:, dim], **plot_kwargs)
ylabel = v.attrs.get('LABLAXIS', '')
u = v.units(fmt='latex')
if u.strip():
ylabel = '{}{}(${}$)'.format(ylabel, ' ' if ylabel else '', u)
if ylabel:
ax.set_ylabel(ylabel)
if x.attrs.get('LABLAXIS'):
ax.set_xlabel(x.attrs['LABLAXIS'])
if labels is not None and target is not ax:
ax.legend(loc='best')
if target is None and v.attrs.get('CATDESC'):
fig.suptitle(v.attrs['CATDESC'])
spacepy.plot.utils.applySmartTimeTicks(ax, x)
return ax
[docs]
def main_vars(self):
"""Return names of the 'main' variables in this container.
These are variables that are likely to be of direct interest, rather
than dependencies and support data. They are chosen primarily by
not being dependencies of other variables, but if the ``VAR_TYPE``
attribute is present it must be ``data``.
Returns
-------
`list` of `str`
Notes
-----
.. versionadded:: 0.5.0
"""
referenced = set()
for k, v in self.items():
referenced.update([v.attrs[a] for a in v.attrs
if a.startswith(('DEPEND_', 'LABL_PTR_', 'DELTA_'))])
main = sorted(set(self).difference(referenced))
if any(('VAR_TYPE' in v.attrs for v in self.values())):
main = [m for m in main if self[m].attrs.get('VAR_TYPE', '') == 'data']
return main
[docs]
def plot(self, vnames=None, fig=None):
"""Plot one or more values (arrays) from this container
Parameters
----------
vnames : `list` of `str`, optional.
The key into this container of the value(s) to plot (i.e.,
the name of the variable). If not specified, plots all
'main' variables which are not dependencies of others;
see `main_vars`.
fig : `matplotlib.figure.Figure`, optional
Where to draw the plot. Default is to create a new figure. If
given, subplots will be added to this figure (it should start
empty).
Returns
-------
fig : `matplotlib.figure.Figure`
The figure on which the variables were plotted
See Also
--------
lineplot : to line plot a single variable
spectrogram : to make a spectrogram of a single variable
Notes
-----
.. versionadded:: 0.5.0
Examples
--------
>>> import spacepy.datamodel
# https://rbsp-ect.newmexicoconsortium.org/data_pub/rbspa/ECT/level2/
>>> data = spacepy.datamodel.fromCDF(
... 'rbspa_ect-elec-L2_20140115_v2.1.0.cdf')
>>> fig = data.plot(['FESA', 'Position'])
>>> fig.show() # if needed
>>> import spacepy.pycdf
# https://rbsp-ect.newmexicoconsortium.org/data_pub/rbspa/hope/level2/spinaverage/
>>> with spacepy.pycdf.CDF('rbspa_rel04_ect-hope-sci-L2SA_20140108_v6.1.0.cdf') as f:
... data = f.copy()
>>> fig = data.plot(['FESA', 'FPSA'])
>>> fig.show() # if needed
>>> import spacepy.pycdf
# https://spp-isois.sr.unh.edu/data_public/ISOIS/level2/
>>> with spacepy.pycdf.CDF('psp_isois_l2-summary_20201130_v13.cdf') as f:
... data = f.copy()
>>> fig = data.plot(['A_H_Rate_TS', 'H_CountRate_ChanP_SP'])
>>> fig.show() # if needed
"""
if fig is None:
import matplotlib.pyplot
fig = matplotlib.pyplot.figure()
if isinstance(vnames, collections.abc.Hashable) and vnames in self:
vnames = [vnames]
if vnames is None:
vnames = self.main_vars()
n_plots = len(vnames)
for i, k in enumerate(vnames):
ax = fig.add_subplot(n_plots, 1, i + 1)
if self[k].plot_as_line():
self.lineplot(k, target=ax)
h, l = ax.get_legend_handles_labels()
if l:
ax.legend(h, l, loc='best')
else:
self.spectrogram(k, target=ax)
return fig
[docs]
def spectrogram(self, vname, target=None):
"""Spectrogram plot of a value (array) from this container
Parameters
----------
vname : `str`
The key into this container of the value to plot (i.e.,
the name of the variable).
target : `matplotlib.axes.Axes` or `matplotlib.figure.Figure`, optional
Where to draw the plot. Default is to create a new figure with
a single subplot. If ``Axes``, will draw into that subplot (and
will not set figure title); if ``Figure``, will make a single
subplot (and not set figure title). Handled by
`~.plot.utils.set_target`.
Returns
-------
ax : `matplotlib.axes.Axes`
The subplot on which the variable was plotted
Notes
-----
.. versionadded:: 0.5.0
"""
import matplotlib.cm
import spacepy.plot.utils
v = self[vname]
fig, ax = spacepy.plot.utils.set_target(target)
x = self[v.attrs['DEPEND_0']]
data = v.replace_invalid()
x = self[v.attrs['DEPEND_0']]
y = self[v.attrs['DEPEND_1']]
zlabel = v.attrs.get('LABLAXIS', '')
u = v.units(fmt='latex')
if u.strip():
zlabel = '{}{}(${}$)'.format(
zlabel, ' ' if zlabel else '', u)
zlabel = zlabel if zlabel else None
try: # mpl >=3.7
cmap = matplotlib.colormaps.get_cmap(None)
except AttributeError:
cmap = matplotlib.cm.get_cmap()
cmap = copy.copy(cmap)
if cmap(-1.)[:3] == cmap(0.)[:3]: # Underflow to black if not specified
cmap.set_under('k')
# Fill to grey or white
if cmap(numpy.nan)[:3] == cmap(0.)[:3] and cmap(numpy.nan)[-1] > 0.:
cmap.set_bad((.5, .5, .5, 0.) if cmap(1.)[:3] == (1., 1., 1.)
else (1., 1., 1., 0.))
ax = spacepy.plot.simpleSpectrogram(numpy.array(x), numpy.array(y), data, cbtitle=zlabel,
ax=ax, zero_valid=True, cmap=cmap)
ylabel = y.attrs.get('LABLAXIS', '')
u = y.units(fmt='latex')
if u.strip():
ylabel = '{}{}(${}$)'.format(
ylabel, ' ' if ylabel else '', u)
if ylabel:
ax.set_ylabel(ylabel)
if x.attrs.get('LABLAXIS'):
ax.set_xlabel(x.attrs['LABLAXIS'])
if target is None and v.attrs.get('CATDESC'):
fig.suptitle(v.attrs['CATDESC'])
spacepy.plot.utils.applySmartTimeTicks(ax, x)
return ax
[docs]
def get_deltas(self, vname):
"""Return deltas for an array
Returns ISTP delta values. These may be uncertainties or may
be e.g. bin widths; interpretation is undefined.
Invalid values are replaced with `~numpy.nan`.
Parameters
----------
vname : `str`
The key into this container of the value to get delta
(i.e., the name of the variable).
Returns
-------
deltas : `tuple` of `~numpy.ndarray`
Deltas for ``vname``. Empty if no deltas available;
one-element if symmetric; two-element if not symmetric.
Notes
-----
.. versionadded:: 0.5.0
"""
v = self[vname]
asymmetric_msg = 'Only one of DELTA_(MINUS|PLUS)_VAR specified.'
if 'DELTA_PLUS_VAR' not in v.attrs:
if 'DELTA_MINUS_VAR' in v.attrs:
raise ValueError(asymmetric_msg)
return ()
elif 'DELTA_MINUS_VAR' not in v.attrs:
raise ValueError(asymmetric_msg)
dp = self[v.attrs['DELTA_PLUS_VAR']].replace_invalid()
if v.attrs['DELTA_PLUS_VAR'] == v.attrs['DELTA_MINUS_VAR']:
return(dp,)
return(self[v.attrs['DELTA_MINUS_VAR']].replace_invalid(), dp)
[docs]
def toDataFrame(self, vname=None, copy=True):
"""Convert to Pandas DataFrame
Converts one variable (and its dependencies) to a Pandas
`~pandas.DataFrame`. Invalid values are replaced with `~numpy.nan`.
Parameters
----------
vname : `str`, optional
The key into this container of the value to convert
(i.e., the name of the variable). Strongly recommended;
if not specified, will try to find one using `main_vars`,
and raise `ValueError` if there is more than one candidate.
Returns
-------
`~pandas.DataFrame`
Data from the array named by ``vname`` and its dependencies.
Other Parameters
----------------
copy : `bool`, default ``True``
Copy data to the DataFrame. If ``False``, changes to the
DataFrame may affect the source data. In some cases a copy
may be made even if ``False``.
Notes
-----
.. versionadded:: 0.6.0
Examples
--------
>>> import spacepy.datamodel
# https://rbsp-ect.newmexicoconsortium.org/data_pub/rbspa/ECT/level2/
>>> data = spacepy.datamodel.fromCDF(
... 'rbspa_ect-elec-L2_20140115_v2.1.0.cdf')
>>> df = data.toDataFrame('Position')
>>> df.plot()
"""
import pandas
if vname is None:
main_vars = self.main_vars()
if len(main_vars) != 1:
matches = ', '.join(main_vars) if main_vars else 'none'
raise ValueError(
f'No variable specified; possible matches: {matches}.')
vname = main_vars[0]
a = self[vname].attrs
data = self[vname].replace_invalid() # makes copy
if not numpy.isnan(data).any() and not copy:
data = self[vname][...]
if 'LABL_PTR_1' in a:
columns = self[a['LABL_PTR_1']][...]
else:
columns = [a.get('FIELDNAM', vname)]
if len(data.shape) > 1:
columns *= data.shape[-1]
df = pandas.DataFrame(
data=data, index=self[a['DEPEND_0']][...],
columns=columns, copy=False)
return df
[docs]
@classmethod
def fromDataFrame(cls, df, copy=True):
"""Convert from Pandas DataFrame
Converts a Pandas `~pandas.DataFrame` to ISTP-compliant type.
`~numpy.nan` are replaced with fill.
Parameters
----------
df : `~pandas.DataFrame`
Data frame to convert
Returns
-------
`ISTPContainer`
ISTP-compliant container representing the dataframe's data.
This may not be fully ISTP-compliant; the minimium attributes
required to represent the DataFrame are used.
Other Parameters
----------------
copy : `bool`, default ``True``
Copy data from the DataFrame. If ``False``, changes to the
output may affect the DataFrame. In some cases a copy
may be made even if ``False``.
Notes
-----
.. versionadded:: 0.6.0
"""
output = cls()
# assume can hold a dmarray, a CDF will convert it
fill = numpy.isnan(df.values)
if fill.any():
copy = True
data = df.values.copy() if copy else df.values
vartype = {
'f': 'float',
'i': 'int',
'U': 'char',
}.get(data.dtype.kind, 'float')
attrs = createISTPattrs('data', ndims=2, vartype=vartype)
attrs.update({
'DEPEND_1': 'ColumnNumbers',
'DISPLAY_TYPE': 'time_series',
'FIELDNAM': 'data',
'LABL_PTR_1': 'Labels',
})
for k in ('CATDESC', 'LABLAXIS', 'SI_CONVERSION', 'UNITS',
'VALIDMIN', 'VALIDMAX'):
del attrs[k]
output['data'] = dmarray(data, attrs=attrs)
output['data'][fill] = -1e31
attrs = createISTPattrs('support_data', vartype='tt2000')
attrs.update({
'FIELDNAM': 'Epoch',
})
del attrs['CATDESC']
output['Epoch'] = dmarray(df.index.to_pydatetime(), attrs=attrs)
attrs = createISTPattrs('metadata', vartype='char', NRV=True)
attrs.update({
'FIELDNAM': 'Labels',
})
del attrs['CATDESC']
output['Labels'] = dmarray(df.columns.values, attrs=attrs, dtype='U')
attrs = createISTPattrs('support_data', vartype='int', NRV=True)
attrs.update({
'FIELDNAM': 'ColumnNumbers',
})
del attrs['CATDESC']
output['ColumnNumbers'] = dmarray(numpy.arange(len(df.columns)),
attrs=attrs)
return output
[docs]
class dmarray(numpy.ndarray, MetaMixin, ISTPArray):
"""
Container for data within a SpaceData object
Although the format of attributes is not enforced, using ISTP metadata
enables the use of methods from `ISTPArray`.
.. versionchanged:: 0.7.0
Operations on a ``dmarray`` which return a scalar value return a numpy
:std:term:`array scalar`. Previously they would return the base
Python type. Assignment to ``dtype`` and ``shape`` are also now
supported; previously this raised :exc:`TypeError`.
Raises
------
NameError
raised is the request name was not added to the allowed attributes list
Examples
--------
>>> import spacepy.datamodel as datamodel
>>> position = datamodel.dmarray([1,2,3], attrs={'coord_system':'GSM'})
>>> position
dmarray([1, 2, 3])
>>> position.attrs
{'coord_system': 'GSM'}a
The dmarray, like a numpy ndarray, is versatile and can store
any datatype; dmarrays are not just for arrays.
>>> name = datamodel.dmarray('TestName')
dmarray('TestName')
To extract the string (or scalar quantity), use the tolist method
>>> name.tolist()
'TestName'
See methods of `ISTPArray` if attributes are ISTP-compliant.
"""
Allowed_Attributes = ['attrs']
def __new__(cls, input_array, attrs=None, dtype=None):
# Input array is an already formed ndarray instance
# We first cast to be our class type
if not dtype:
obj = numpy.asarray(input_array).view(cls)
else:
obj = numpy.asarray(input_array).view(cls).astype(dtype)
# add the new attribute to the created instance
if attrs != None:
obj.attrs = attrs
else:
obj.attrs = {}
# Finally, return the newly created object:
return obj
def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments
if obj is None:
return
for val in self.Allowed_Attributes:
self.__setattr__(val, copy.deepcopy(getattr(obj, val, {})))
def __array_wrap__(self, out_arr, context=None, return_scalar=None):
#check for zero-dims (numpy bug means subclass behaviour isn't consistent with ndarray
#this traps most of the bad behaviour ( std() and var() still problems)
if return_scalar:
return super().__array_wrap__(out_arr, context, True)[()]
if return_scalar is False:
return super().__array_wrap__(out_arr, context, False)
# Pre numpy 2.0, so guess if scalar
if out_arr.ndim:
return super().__array_wrap__(out_arr, context)
return super().__array_wrap__(out_arr, context)[()]
def __reduce__(self):
"""This is called when pickling, see:
http://www.mail-archive.com/numpy-discussion@scipy.org/msg02446.html
for this particular example.
Only the attributes in Allowed_Attributes can exist
"""
object_state = list(numpy.ndarray.__reduce__(self))
subclass_state = tuple([tuple([val, self.__getattribute__(val)]) for val in self.Allowed_Attributes])
object_state[2] = (object_state[2], subclass_state)
return tuple(object_state)
def __setstate__(self, state):
"""Used for unpickling after __reduce__ the self.attrs is recovered from
the way it was saved and reset.
"""
nd_state, own_state = state
numpy.ndarray.__setstate__(self, nd_state)
for i, val in enumerate(own_state):
if not val[0] in self.Allowed_Attributes: # this is attrs
self.Allowed_Attributes.append(own_state[i][0])
self.__setattr__(own_state[i][0], own_state[i][1])
def __setattr__(self, name, value):
"""Make sure that .attrs is the only attribute that we are allowing
dmarray_ne took 15.324803 s
dmarray_eq took 15.665865 s
dmarray_assert took 16.025478 s
It looks like != is the fastest, but not by much over 10000000 __setattr__
"""
# Several attrs special-cased because should NOT be pickled
if name in ('Allowed_Attributes', 'dtype', 'meta', 'shape'):
pass
elif not name in self.Allowed_Attributes:
raise TypeError("Only attribute listed in Allowed_Attributes can be set")
super(dmarray, self).__setattr__(name, value)
[docs]
def addAttribute(self, name, value=None):
"""Method to add an attribute to a dmarray
equivalent to
a = datamodel.dmarray([1,2,3])
a.Allowed_Attributes = a.Allowed_Attributes + ['blabla']
"""
if name in self.Allowed_Attributes:
raise NameError('{0} is already an attribute cannot add again'.format(name))
self.Allowed_Attributes.append(name)
self.__setattr__(name, value)
[docs]
def count(self, srchval):
"""
Equivalent to count method on list
"""
mask = self == srchval
return int(mask.sum())
def _saveAttrs(self):
Allowed_Attributes = self.Allowed_Attributes
backup = []
for atr in Allowed_Attributes:
backup.append((atr, dmcopy(self.__getattribute__(atr))))
return backup
@classmethod
def _replaceAttrs(cls, arr, backup):
for key, val in backup:
if key != 'attrs':
try:
arr.addAttribute(key)
except NameError:
pass
arr.__setattr__(key, val)
return arr
[docs]
@classmethod
def append(cls, one, other):
"""
append data to an existing dmarray
"""
backup = one._saveAttrs()
outarr = dmarray(numpy.append(one, other))
return cls._replaceAttrs(outarr, backup)
[docs]
@classmethod
def vstack(cls, one, other):
"""
vstack data to an existing dmarray
"""
backup = one._saveAttrs()
outarr = dmarray(numpy.vstack((one, other)))
return cls._replaceAttrs(outarr, backup)
[docs]
@classmethod
def hstack(cls, one, other):
"""
hstack data to an existing dmarray
"""
backup = one._saveAttrs()
outarr = dmarray(numpy.hstack((one, other)))
return cls._replaceAttrs(outarr, backup)
[docs]
@classmethod
def dstack(cls, one, other):
"""
dstack data to an existing dmarray
"""
backup = one._saveAttrs()
outarr = dmarray(numpy.dstack((one, other)))
return cls._replaceAttrs(outarr, backup)
[docs]
@classmethod
def concatenate(cls, one, other, axis=0):
"""
concatenate data to an existing dmarray
"""
backup = one._saveAttrs()
outarr = dmarray(numpy.concatenate((one, other), axis=axis))
return cls._replaceAttrs(outarr, backup)
[docs]
def dmfilled(shape, fillval=0, dtype=None, order='C', attrs=None):
"""
Return a new dmarray of given shape and type, filled with a specified value (default=0).
See Also
--------
numpy.ones
Examples
--------
>>> import spacepy.datamodel as dm
>>> dm.dmfilled(5, attrs={'units': 'nT'})
dmarray([ 0., 0., 0., 0., 0.])
>>> dm.dmfilled((5,), fillval=1, dtype=np.int)
dmarray([1, 1, 1, 1, 1])
>>> dm.dmfilled((2, 1), fillval=np.nan)
dmarray([[ nan],
[ nan]])
>>> a = dm.dmfilled((2, 1), np.nan, attrs={'units': 'nT'})
>>> a
dmarray([[ nan],
[ nan]])
>>> a.attrs
{'units': 'nT'}
"""
a = dmarray(numpy.empty(shape, dtype, order), attrs=attrs)
a.fill(fillval)
return a
[docs]
class SpaceData(dict, MetaMixin, ISTPContainer):
"""
Datamodel class extending dict by adding attributes.
Although the format of attributes is not enforced, using ISTP metadata
enables the use of methods from `ISTPContainer`.
"""
def __getitem__(self, key):
"""
This allows one to make a SpaceData indexed with an iterable of
keys to return a new spacedata made of the subset of keys
"""
try:
return super(SpaceData, self).__getitem__(key)
except (KeyError, TypeError):
if isinstance(key, (tuple, list)):
# make a new SpaceData from these keys
out = SpaceData()
out.attrs = self.attrs
for k in key:
out[k] = self[k]
return out
else:
raise KeyError('{0}'.format(key))
[docs]
def __init__(self, *args, **kwargs):
"""
Base class for "Data Model" representation data
Abstract method, reimplement
Attributes
----------
attrs : dict
dictionary of the attributes of the SpaceData object
"""
#raise(ValueError("Abstract method called, reimplement __init__"))
self.attrs = {}
if 'attrs' in kwargs:
if hasattr(kwargs['attrs'], '__getitem__'):
self.attrs = kwargs['attrs']
del kwargs['attrs']
super(SpaceData, self).__init__(*args, **kwargs)
self.toCDF = partial(toCDF, SDobject=self)
self.toCDF.__doc__ = toCDF.__doc__
self.toHDF5 = partial(toHDF5, SDobject=self)
self.toHDF5.__doc__ = toHDF5.__doc__
self.toJSONheadedASCII = partial(toJSONheadedASCII, insd=self)
self.toJSONheadedASCII.__doc__ = toJSONheadedASCII.__doc__
## To enable string output of repr, instead of just printing, uncomment his block
# def __repr__(self):
# #redirect stdout to StringIO
# import io, sys
# dum = io.StringIO()
# sys_stdout_save = sys.stdout
# sys.stdout = dum
# self.tree(verbose=True)
# sys.stdout = sys_stdout_save
# dum.seek(0)
# return ''.join(dum.readlines())
[docs]
def tree(self, **kwargs):
'''Print the contents of the SpaceData object in a visual tree
Other Parameters
----------------
verbose : bool, default False
print more info
spaces : str (optional)
string will added for every line
levels : int (optional)
number of levels to recurse through (True, the default, means all)
attrs : bool, default False
display information for attributes
print_out : bool, default True
.. versionadded:: 0.5.0
Print output (original behavior); if ``False``, return the output.
Examples
--------
>>> import spacepy.datamodel as dm
>>> import spacepy.toolbox as tb
>>> a = dm.SpaceData()
>>> a['1'] = dm.SpaceData(dog = 5)
>>> a['4'] = dm.SpaceData(cat = 'kitty')
>>> a['5'] = 4
>>> a.tree()
+
|____1
|____dog
|____4
|____cat
|____5
See Also
--------
:class:`~spacepy.toolbox.dictree`
'''
from . import toolbox
return toolbox.dictree(self, **kwargs)
[docs]
def flatten(self):
'''
Method to collapse datamodel to one level deep
Examples
--------
>>> import spacepy.datamodel as dm
>>> import spacepy.toolbox as tb
>>> a = dm.SpaceData()
>>> a['1'] = dm.SpaceData(dog = 5, pig = dm.SpaceData(fish=dm.SpaceData(a='carp', b='perch')))
>>> a['4'] = dm.SpaceData(cat = 'kitty')
>>> a['5'] = 4
>>> a.tree()
+
|____1
|____dog
|____pig
|____fish
|____a
|____b
|____4
|____cat
|____5
>>> b = dm.flatten(a)
>>> b.tree()
+
|____1<--dog
|____1<--pig<--fish<--a
|____1<--pig<--fish<--b
|____4<--cat
|____5
>>> a.flatten()
>>> a.tree()
+
|____1<--dog
|____1<--pig<--fish<--a
|____1<--pig<--fish<--b
|____4<--cat
|____5
'''
flatobj = flatten(self)
remkeys = [key for key in self]
for key in remkeys:
del self[key]
for key in flatobj:
self[key] = copy.copy(flatobj[key])
# Stubs of partialed-in functions for docs; actual versions populated
# when class instantiated
[docs]
def toCDF(fname, **kwargs):
"""Create CDF file from this SpaceData.
See `toCDF`; this object is provided for ``SDobject``."""
[docs]
def toHDF5(fname, **kwargs):
"""Create HDF5 file from this SpaceData.
See `toHDF5`; this object is provided for ``SDObject``. """
[docs]
def toJSONheadedASCII(fname, **kwargs):
"""Create JSON-headed ASCII file from this SpaceData.
See `toJSONheadedASCII`; this object is provided for ``insd``."""
[docs]
def convertKeysToStr(SDobject):
if isinstance(SDobject, SpaceData):
newSDobject = SpaceData()
newSDobject.attrs = SDobject.attrs
else:
newSDobject = {}
for key in SDobject:
if not isinstance(key, str_classes):
if isinstance(SDobject[key], dict):
newSDobject[str(key)] = convertKeysToStr(SDobject[key])
else:
newSDobject[str(key)] = SDobject[key]
else:
if isinstance(SDobject[key], dict):
newSDobject[key] = convertKeysToStr(SDobject[key])
else:
newSDobject[key] = SDobject[key]
return newSDobject
[docs]
def flatten(dobj):
'''Collapse datamodel to one level deep
Examples
--------
>>> import spacepy.datamodel as dm
>>> import spacepy.toolbox as tb
>>> a = dm.SpaceData()
>>> a['1'] = dm.SpaceData(dog = 5, pig = dm.SpaceData(fish=dm.SpaceData(a='carp', b='perch')))
>>> a['4'] = dm.SpaceData(cat = 'kitty')
>>> a['5'] = 4
>>> a.tree()
+
|____1
|____dog
|____pig
|____fish
|____a
|____b
|____4
|____cat
|____5
>>> b = dm.flatten(a)
>>> b.tree()
+
|____1<--dog
|____1<--pig<--fish<--a
|____1<--pig<--fish<--b
|____4<--cat
|____5
>>> a.flatten()
>>> a.tree()
+
|____1<--dog
|____1<--pig<--fish<--a
|____1<--pig<--fish<--b
|____4<--cat
|____5
See Also
--------
unflatten
SpaceData.flatten
'''
try:
addme = dobj.__class__()
except TypeError:
addme = SpaceData()
remlist = []
for key in dobj: #iterate over keys in SpaceData
if isinstance(dobj[key], dict):
remlist.append(key)
newname = str(key) + '<--'
for levkey in dobj[key]:
if hasattr(dobj[key][levkey], 'keys'):
retdict = flatten(dobj[key][levkey])
for key2 in retdict:
addme[newname+levkey+'<--'+key2] = retdict[key2]
else:
addme[newname+levkey] = copy.copy(dobj[key][levkey])
else:
addme[key] = copy.copy(dobj[key])
return addme
[docs]
def unflatten(dobj, marker='<--'):
'''Collapse datamodel to one level deep
Examples
--------
>>> import spacepy.datamodel as dm
>>> import spacepy.toolbox as tb
>>> a = dm.SpaceData()
>>> a['1'] = dm.SpaceData(dog = 5, pig = dm.SpaceData(fish=dm.SpaceData(a='carp', b='perch')))
>>> a['4'] = dm.SpaceData(cat = 'kitty')
>>> a['5'] = 4
>>> a.tree()
+
|____1
|____dog
|____pig
|____fish
|____a
|____b
|____4
|____cat
|____5
>>> b = dm.flatten(a)
>>> b.tree()
+
|____1<--dog
|____1<--pig<--fish<--a
|____1<--pig<--fish<--b
|____4<--cat
|____5
>>> c = dm.unflatten(b)
>>> c.tree()
+
|____1
|____dog
|____pig
|____fish
|____a
|____b
|____4
|____cat
|____5
'''
#set up a new object for return
try:
addme = dobj.__class__()
except TypeError:
addme = SpaceData()
#the input is assumed to be single level (i.e. it is flat)
#find all keys that have at least one marker,
#then unpack. Recurse over these until no more markers are found.
keydict = {}
for key in dobj:
if isinstance(dobj[key], dict):
raise TypeError('Flat datamodel should not contain dict-likes')
try:
if marker in key:
#get 'group'
group = key.split(marker)[0]
if not group in keydict:
keydict[group] = {key: ''}
else:
keydict[group][key] = ''
else: #not nested, just copy key
addme[key] = dmcopy(dobj[key])
except:
addme[key] = dmcopy(dobj[key])
#now we have all the groups at this level
#move members of groups into new SpaceDatas
for grp in keydict:
addme[grp] = SpaceData()
for key in keydict[grp]:
newkey = marker.join(key.split(marker)[1:])
addme[grp][newkey] = dmcopy(dobj[key])
# recurse to make sure everything inside is unpacked
addme[grp] = unflatten(addme[grp], marker=marker)
return addme
[docs]
def fromCDF(fname):
'''
Create a SpacePy datamodel representation of a NASA CDF file
Parameters
----------
file : string
the name of the cdf file to be loaded into a datamodel
Returns
-------
out : spacepy.datamodel.SpaceData
SpaceData with associated attributes and variables in dmarrays
Examples
--------
>>> import spacepy.datamodel as dm
>>> data = dm.fromCDF('test.cdf')
See Also
--------
.pycdf.CDF.copy
.pycdf.istp.VarBundle
'''
#TODO: add unflatten keyword and restore flattened variables
try:
from spacepy import pycdf
except ImportError:
raise ImportError("CDF converter requires NASA CDF library and SpacePy's pyCDF")
with pycdf.CDF(fname) as cdfdata:
return cdfdata.copy()
[docs]
def toCDF(fname, SDobject, skeleton='', flatten=False, overwrite=False,
autoNRV=False, backward=None, TT2000=None, verbose=False):
'''
Create a CDF file from a SpacePy datamodel representation
Parameters
----------
fname : str
Filename to write to
SDobject : spacepy.datamodel.SpaceData
SpaceData with associated attributes and variables in dmarrays
Other Parameters
----------------
skeleton : str (optional)
create new CDF from a skeleton file (default '')
flatten : bool (optional)
flatten incoming datamodel - if SpaceData objects are nested (default False)
overwrite : bool (optional)
allow overwrite of an existing target file (default False)
autoNRV : bool (optional)
attempt automatic identification of non-record varying entries in CDF
backward : bool (optional)
``False`` to create CDF in backward-compatible format; ``True``
to force v3+ compatibility only. (Default: do not change current
state, see :meth:`~.pycdf.Library.set_backward`).
.. versionchanged:: 0.5.0
Now supports specifying backward compatible or no change;
previous versions always wrote v3+ CDFs (even if ``False``).
TT2000 : bool (optional)
Specify type for variables with names beginning 'Epoch'. Default
CDF_EPOCH for backward-compatible CDF (``backward`` True) and
CDF_TT20000 otherwise (``backward`` False or unspecified).
.. versionchanged:: 0.5.0
Current handling introduced.
.. versionchanged:: 0.3.0
Always write TT2000 variables (due to change in :mod:`~.pycdf`).
verbose : bool (optional)
verbosity flag
Returns
-------
None
Notes
-----
.. versionchanged:: 0.5.0
Invalid keyword arguments now raise :exc:`TypeError` rather than being ignored.
'''
if flatten:
SDobject = SDobject.flatten()
if overwrite:
raise NotImplementedError('Overwriting CDFs is not currently enabled '
'- please remove the file manually')
if TT2000 and backward:
raise ValueError('Cannot use TT2000 in backward-compatible CDF.')
try:
from spacepy import pycdf
except ImportError:
raise ImportError("CDF converter requires NASA CDF library and"
" SpacePy's pyCDF")
if backward is None:
former_backward = None
else:
former_backward = pycdf.lib.set_backward(backward)
force_epoch = not backward and TT2000 is False # backward defaults falsey
with pycdf.CDF(fname, skeleton) as outdata:
if hasattr(SDobject, 'attrs'):
for akey in SDobject.attrs:
outdata.attrs[akey] = dmcopy(SDobject.attrs[akey])
varLengths = [len(SDobject[var]) for var in SDobject]
modeLength = next(itertools.groupby((reversed(sorted(varLengths)))))[0]
for key, val in SDobject.items():
if isinstance(val, dict):
raise TypeError('This data structure appears to be nested,'
' please try spacepy.datamodel.flatten')
if not skeleton:
if not val.shape:
shape_tup = -1
else:
shape_tup = val.shape
if 'Epoch' not in SDobject:
NRVtest = modeLength
else:
NRVtest = len(SDobject['Epoch'])
if shape_tup[0] != NRVtest: #naive check for 'should-be' NRV
try:
v = outdata.new(key, val[...], recVary=False)
if verbose:
print('{0} is being made NRV'.format(key))
v.attrs = dmcopy(val.attrs)
except ValueError:
v = outdata.new(key, val.tolist(), recVary=False)
v.attrs = dmcopy(val.attrs)
if force_epoch and 'Epoch' in key:
outdata.new(key, val[...], type=pycdf.const.CDF_EPOCH)
else:
try:
outdata[key] = val
except ValueError:
try:
outdata[key] = dmarray(
[val.tolist()], attrs=dmcopy(val.attrs)).squeeze()
except UnicodeEncodeError:
tmpAttrs = dmcopy(val.attrs)
for akey in tmpAttrs:
try: #strings
if hasattr(tmpAttrs[akey], 'encode'):
tmpAttrs[akey] = tmpAttrs[akey].encode('utf-8')
else:
tmpAttrs[akey] = tmpAttrs[akey]
except AttributeError: #probably a list of strings
for id, el in enumerate(tmpAttrs[akey]):
tmpAttrs[akey][id] = el.encode('utf-8')
else:
outdata[key][...] = val[...]
for akey in outdata[key].attrs:
try:
outdata[key].attrs[akey] = dmcopy(val.attrs[akey])
except ValueError:
outdata[key][...] = dmarray([val.tolist()], attrs=dmcopy(val.attrs))
except KeyError:
pass
if former_backward is not None:
pycdf.lib.set_backward(former_backward)
[docs]
def fromHDF5(fname, **kwargs):
'''
Create a SpacePy datamodel representation of an HDF5 file or netCDF4 file which is HDF5 compliant
Parameters
----------
file : string
the name of the HDF5/netCDF4 file to be loaded into a datamodel
Returns
-------
out : spacepy.datamodel.SpaceData
SpaceData with associated attributes and variables in dmarrays
Examples
--------
>>> import spacepy.datamodel as dm
>>> data = dm.fromHDF5('test.hdf')
Notes
-----
Zero-sized datasets will break in h5py. This is kluged by returning a
dmarray containing a None.
This function is expected to work with any HDF5-compliant files, including
netCDF4 (not netCDF3) and MatLab save files from v7.3 or later, but some
datatypes are not supported, e.g., non-string vlen datatypes, and will
raise a warning.
'''
def hdfcarryattrs(SDobject, hfile, path):
if hasattr(hfile[path], 'attrs'):
#for key, value in hfile[path].attrs.iteritems():
for key in hfile[path].attrs:
try:
value = hfile[path].attrs[key]
except TypeError:
warnings.warn('Unsupported datatype in dataset {}.attrs[{}]'.format(path, key), stacklevel=2)
continue
try:
SDobject.attrs[key] = value
except:
warnings.warn('The following key:value pair is not permitted\n' +
'key = {0} ({1})\n'.format(key, type(key)) +
'value = {0} ({1})'.format(value, type(value)),
DMWarning, stacklevel=2)
try:
import h5py
except ImportError:
raise ImportError('HDF5 converter requires h5py')
if isinstance(fname, str_classes):
hfile = h5py.File(fname, mode='r')
else:
hfile = fname
#should test here for HDF file object
path = kwargs.get('path', '/')
SDobject = SpaceData()
allowed_elems = [h5py.Group, h5py.Dataset]
##carry over the attributes
hdfcarryattrs(SDobject, hfile, path)
##carry over the groups and datasets
for key, value in hfile[path].items():
if isinstance(value, allowed_elems[0]): # if a group
SDobject[key] = fromHDF5(hfile, path=path+'/'+key)
elif isinstance(value, allowed_elems[1]): # if a dataset
isuni = h5py.check_vlen_dtype(value.dtype) is str
try:
if isuni:
if hasattr(value, 'asstr'): # h5py 3+
value = value.asstr()
value = numpy.require(value[...], dtype=str)
SDobject[key] = dmarray(value)
except (TypeError, ZeroDivisionError): #ZeroDivisionError catches zero-sized DataSets
SDobject[key] = dmarray(None)
hdfcarryattrs(SDobject[key], hfile, path+'/'+key)
if path == '/':
hfile.close()
return SDobject
[docs]
def toHDF5(fname, SDobject, **kwargs):
'''
Create an HDF5 file from a SpacePy datamodel representation
Parameters
----------
fname : str
Filename to write to
SDobject : spacepy.datamodel.SpaceData
SpaceData with associated attributes and variables in dmarrays
Other Parameters
----------------
overwrite : bool (optional)
allow overwrite of an existing target file (default True)
mode : str (optional)
HDF5 file open mode (a, w, r) (default 'a')
compression : str (optional)
compress all non-scalar variables using this method (default None)
(gzip, shuffle, fletcher32, szip, lzf)
.. versionchanged:: 0.4.0
No longer compresses scalars (which usually fails).
compression_opts : str (optional)
options to the compression, see h5py documentation for more details
Returns
-------
None
Examples
--------
>>> import spacepy.datamodel as dm
>>> a = dm.SpaceData()
>>> a['data'] = dm.dmarray(range(100000), dtype=float)
>>> dm.toHDF5('test_gzip.h5', a, overwrite=True, compression='gzip')
>>> dm.toHDF5('test.h5', a, overwrite=True)
>>> # test_gzip.h5 was 118k, test.h5 was 785k
'''
def SDcarryattrs(SDobject, hfile, path, allowed_attrs):
if hasattr(SDobject, 'attrs'):
for key, value in SDobject.attrs.items():
dumval, dumkey = copy.copy(value), copy.copy(key)
if isinstance(value, allowed_attrs):
#test for datetimes in iterables
if hasattr(value, '__iter__') and not isinstance(value, str_classes):
dumval = [b.isoformat() if isinstance(b, datetime.datetime) else b for b in value]
truth = False
try:
if value.nbytes: truth = True #empty arrays of any dimension are nbytes=0
except AttributeError: #not an array
if value or value == 0: truth = True
if truth:
uni = False #No special unicode handling
dumval = numpy.asanyarray(dumval)
if dumval.size and dumval.dtype.kind == 'U':
uni = True #Unicode list, special handling
try:
if uni:
#Tell hdf5 this is unicode. Numpy is UCS-4, HDF5 is UTF-8
hfile[path].attrs.create(
dumkey, dumval, dtype=h5py.string_dtype(encoding='utf-8'))
else:
hfile[path].attrs[dumkey] = dumval
except TypeError:
hfile[path].attrs[dumkey] = str(dumval)
warnings.warn(
'The following value is not permitted\n' +
'key, value, type = {0}, {1}, {2})\n'.format(
key, value, type(value)) +
'value has been converted to a string for output',
DMWarning, stacklevel=2)
else:
hfile[path].attrs[dumkey] = ''
elif isinstance(value, datetime.datetime):
dumval = value.isoformat()
hfile[path].attrs[dumkey] = dumval
else:
#TODO: add support for arrays(?) in attrs (convert to isoformat)
warnings.warn('The following key:value pair is not permitted\n' +
'key = {0} ({1})\n'.format(key, type(key)) +
'value type {0} is not in the allowed attribute list'.format(type(value)),
DMWarning, stacklevel=2)
try:
import h5py
except ImportError:
raise ImportError('h5py is required to use HDF5 files')
if not isinstance(SDobject, SpaceData):
raise ValueError("Input data is not of type SpaceData, check usage:"
" toHDF5(fname, datamodel)")
#mash these into a defaults dict...
wr_mo = kwargs.get('mode', 'a')
h5_compr_type = kwargs.get('compression', None)
if h5_compr_type not in ['gzip', 'szip', 'lzf', 'shuffle', 'fletcher32', None]:
raise NotImplementedError('Specified compression type not supported')
h5_compr_opts = None if h5_compr_type == 'lzf'\
else kwargs.get('compression_opts', None)
if 'overwrite' not in kwargs:
kwargs['overwrite'] = True
if isinstance(fname, str_classes):
if os.path.isfile(fname):
if kwargs['overwrite']:
os.remove(fname)
else:
raise IOError('Cannot write HDF5, file exists (see overwrite) "{!s}"'.format(fname))
hfile = h5py.File(fname, mode=wr_mo)
must_close = True
else:
hfile = fname
#should test here for HDF file object
must_close = False
path = kwargs.get('path', '/')
allowed_attrs = [int, float, bytes, str, numpy.ndarray, list, tuple, numpy.bytes_]
allowed_attrs.extend([
numpy.dtype(v).type
for v in itertools.chain(numpy.typecodes['AllInteger'], numpy.typecodes['AllFloat'])])
allowed_attrs = tuple(allowed_attrs)
allowed_elems = (SpaceData, dmarray)
#first convert non-string keys to str
SDobject = convertKeysToStr(SDobject)
SDcarryattrs(SDobject, hfile, path, allowed_attrs)
try:
for key, value in SDobject.items():
if isinstance(value, allowed_elems[0]):
hfile[path].create_group(key)
toHDF5(
hfile, value, path=path + '/' + key,
compression=h5_compr_type, compression_opts=h5_compr_opts)
elif isinstance(value, allowed_elems[1]):
comptype, compopts = (None, None) if value.shape == ()\
else (h5_compr_type, h5_compr_opts)
try:
hfile[path].create_dataset(key, data=value,
compression=comptype, compression_opts=compopts)
except:
dumval = numpy.asanyarray(value.copy())
dtype = None
if dumval.dtype.kind == 'U':
dumval = numpy.char.encode(dumval, 'utf-8')
dtype = h5py.string_dtype(encoding='utf-8')
elif isinstance(value[0], datetime.datetime):
for i, val in enumerate(value):
dumval[i] = val.isoformat()
dumval = dumval.astype('|S35')
else:
dumval = dumval.astype('|S35')
hfile[path].create_dataset(key, data=dumval, compression=comptype,
compression_opts=compopts, dtype=dtype)
#else:
# hfile[path].create_dataset(key, data=value.astype(float))
SDcarryattrs(SDobject[key], hfile, path+'/'+key, allowed_attrs)
else:
warnings.warn('The following data is not being written as is not of an allowed type\n' +
'key = {0} ({1})\n'.format(key, type(key)) +
'value type {} is not in the allowed data type list'.format(
type(value)), DMWarning, stacklevel=2)
finally:
if must_close:
hfile.close()
[docs]
def fromNC3(fname):
try:
from scipy.io import netcdf as nc
except ImportError:
raise ImportError('SciPy is required to import netcdf3')
ncfile = nc.netcdf_file(fname, mode='r', mmap=False)
SDobject = SpaceData(attrs=dmcopy(ncfile._attributes))
##carry over the groups and datasets
for key, value in ncfile.variables.items():
#try:
SDobject[key] = dmarray(dmcopy(value.data), attrs=dmcopy(value._attributes))
#except (TypeError, ZeroDivisionError): #ZeroDivisionError catches zero-sized DataSets
# SDobject[key] = dmarray(None)
ncfile.close()
return SDobject
[docs]
def toHTML(fname, SDobject, attrs=(),
varLinks=False, echo=False, tableTag='<table border="1">'):
"""
Create an HTML dump of the structure of a spacedata
Parameters
----------
fname : str
Filename to write to
SDobject : spacepy.datamodel.SpaceData
SpaceData with associated attributes and variables in dmarrays
Other Parameters
----------------
overwrite : bool (optional)
allow overwrite of an existing target file (default True)
mode : str (optional)
HDF5 file open mode (a, w, r) (default 'a')
echo : bool
echo the html to the screen
varLinks : bool
make the variable name a link to a stub page
"""
output = io.StringIO() # put the output into a StringIO
keys = sorted(SDobject.keys())
output.write(tableTag)
output.write('\n')
output.write('<tr><th>{0}</th>'.format('Variable'))
for attr in attrs:
output.write('<th>{0}</th>'.format(attr))
output.write('</tr>')
for ii, key in enumerate(keys):
if ii % 2 == 0:
output.write('<tr>')
else:
output.write('<tr class="alt">')
output.write('<td>')
if varLinks:
output.write('<a href="{0}.html">'.format(key))
output.write('{0}'.format(key))
if varLinks:
output.write('</a>')
output.write('</td>')
for attr in attrs:
try:
if not isinstance(SDobject[key].attrs[attr], str):
tmp = str(SDobject[key].attrs[attr])
output.write('<td>{0}</td>'.format(_idl2html(tmp)))
else:
output.write('<td>{0}</td>'.format(_idl2html(SDobject[key].attrs[attr])))
except KeyError:
output.write('<td></td>')
output.write('</tr>\n')
output.write('</table>\n')
with open(fname, 'w') as fp:
fp.write(output.getvalue())
if echo:
print(output.getvalue())
output.close()
def _idl2html(idl):
"""
given an idl format string for text change it to html
Parameters
==========
idl : str
idl formated string
Returns
=======
out : str
html formatted string
"""
html = idl
conv = {'!!': '!',
'!E': '<sup>',
'!I': '<sub>'}
while True: # hate it but for now
ind = html.find('!')
if ind == -1:
break
code = html[ind:ind+2]
html = html.replace(code, conv[code])
if code == '!I':
if '!N' in html:
html = html.replace('!N', '</sub>', 1) # just replace 1
else:
html = html + '</sub>'
elif code == '!E':
if '!N' in html:
html = html.replace('!N', '</sup>', 1) # just replace 1
else:
html = html + '</sup>'
return html
[docs]
def readJSONheadedASCII(fname, mdata=None, comment='#', convert=False, restrict=None):
"""read JSON-headed ASCII data files into a SpacePy datamodel
Parameters
----------
fname : str or list
Filename(s) to read data from
.. versionchanged:: 0.5.0
Filename can now be a .gz to indicate the file is gzipped
Other Parameters
----------------
mdata : spacepy.datamodel.SpaceData (optional)
supply metadata object, otherwise is read from fname (default None)
comment: str (optional)
comment string in file to be read; lines starting with comment are
ignored (default '#')
convert: bool or dict-like (optional)
If True, uses common names to try conversion from string. If a dict-
like then uses the functions specified as the dict values to convert
each element of 'key' to a non-string
restrict: list of strings (optional)
If present, restrict the variables stored to only those on this list
Returns
-------
mdata: spacepy.datamodel.SpaceData
SpaceData with the data and metadata from the file
"""
import dateutil.parser as dup
filelike = False
if isinstance(fname, str_classes):
fname = [fname]
elif hasattr(fname, 'readlines'):
fname = [fname]
filelike = True
if not mdata:
mdata = readJSONMetadata(fname[0])
if restrict:
delkeys = [kk for kk in mdata.keys() if kk not in restrict]
for val in delkeys:
del mdata[val] #remove undesired keys
mdata_copy = dmcopy(mdata)
def innerloop(fh, mdata, mdata_copy):
line = fh.readline()
line = line.decode('latin1')
while (line and line[0] == comment):
line = fh.readline()
line = line.decode('latin1')
fh.seek(-len(line), os.SEEK_CUR) # fixes the missing first data bug
alldata = fh.readlines()
if not alldata:
return mdata
alldata = [d.decode('latin1') for d in alldata]
ncols = len(alldata[0].rstrip().split())
# fixes None in the data from empty lines at the end
for row in range(len(alldata)): # reverse order
if not alldata[-1].rstrip(): # blank line (or al white space)
alldata.pop(-1)
else:
break
nrows = len(alldata)
data = numpy.empty((nrows, ncols), dtype=object)
for ridx, line in enumerate(alldata):
for cidx, el in enumerate(line.rstrip().split()):
data[ridx, cidx] = el
for key in mdata_copy.keys():
if 'START_COLUMN' in mdata_copy[key].attrs:
st = mdata_copy[key].attrs['START_COLUMN']
if 'DIMENSION' in mdata_copy[key].attrs:
varDims = numpy.array(mdata_copy[key].attrs['DIMENSION'])
if not varDims.shape:
varDims = numpy.array([varDims])
singleDim = True
if len(varDims) > 1 or varDims[0] > 1:
singleDim = False
if ('DIMENSION' in mdata_copy[key].attrs) and not singleDim:
en = int(mdata_copy[key].attrs['DIMENSION'][0]) + int(st)
try:
assert mdata[key] == {}
mdata[key] = data[:, int(st):int(en)]
except (AssertionError, ValueError):
mdata[key] = numpy.vstack((mdata[key], data[:, int(st):int(en)]))
else:
try:
assert mdata[key] == {}
mdata[key] = data[:, int(st)]
except (AssertionError, ValueError):
mdata[key] = numpy.hstack((mdata[key], data[:, int(st)]))
return mdata
for fn in fname:
if not filelike:
if fn.endswith('.gz'):
with gzip.open(filename=fn) as gzh:
mdata = innerloop(gzh, mdata, mdata_copy)
else:
with open(fn, 'rb') as fh: # fixes windows bug with seek()
mdata = innerloop(fh, mdata, mdata_copy)
else:
mdata = innerloop(fh, mdata, mdata_copy)
#now add the attributres to the variables
keys = list(mdata_copy.keys())
for key in keys:
if isinstance(mdata[key], SpaceData):
mdata[key] = dmarray(None, attrs=mdata_copy[key].attrs)
else:
mdata[key] = dmarray(mdata[key], attrs=mdata_copy[key].attrs)
if convert:
if isinstance(convert, dict):
conversions = convert
else:
conversions = {'DateTime': lambda x: dup.parse(x, ignoretz=True),
'ExtModel': str}
for conkey in conversions:
try:
name = keys.pop(keys.index(conkey)) #remove from keylist
except ValueError:
warnings.warn('Key {} for conversion not found in file'.format(conkey),
UserWarning, stacklevel=2)
continue
for i, element in numpy.ndenumerate(mdata[name]):
mdata[name][i] = conversions[name](element)
for remkey in keys:
try:
mdata[remkey] = numpy.asanyarray(mdata[remkey], dtype=float)
except ValueError:
pass #this will skip any unspecified string fields
return mdata
def _dateToISO(indict):
"""
covert datetimes to iso strings inside of datamodel attributes
"""
retdict = dmcopy(indict)
if isinstance(indict, dict):
for key in indict:
if isinstance(indict[key], datetime.datetime):
retdict[key] = retdict[key].isoformat()
elif hasattr(indict[key], '__iter__'):
for idx, el in enumerate(indict[key]):
if isinstance(el, datetime.datetime):
retdict[key][idx] = el.isoformat()
else:
if isinstance(indict, datetime.datetime):
retdict = retdict.isoformat()
elif hasattr(indict, '__iter__'):
retdict = numpy.asanyarray(retdict)
for idx, el in numpy.ndenumerate(indict):
if isinstance(el, datetime.datetime):
retdict[idx] = el.isoformat()
return retdict
[docs]
def toJSONheadedASCII(fname, insd, metadata=None, depend0=None, order=None, **kwargs):
'''Write JSON-headed ASCII file of data with metadata from SpaceData object
Parameters
----------
fname : str
Filename to write to (can also use a file-like object)
None can be given in conjunction with the returnString keyword to skip writing output
insd : spacepy.datamodel.SpaceData
SpaceData with associated attributes and variables in dmarrays
Other Parameters
----------------
depend0 : str (optional)
variable name to use to indicate parameter on which other data depend (e.g. Time)
order : list (optional)
list of key names in order of start column in output JSON file
metadata: str or file-like (optional)
filename with JSON header to use (or file-like with JSON metadata)
delimiter: str
delimiter to use in ASCII output (default is whitespace), for tab, use '\t'
Returns
-------
None
Examples
--------
>>> import spacepy.datamodel as dm
>>> data = dm.SpaceData()
>>> data.attrs['Global'] = 'A global attribute'
>>> data['Var1'] = dm.dmarray([1,2,3,4,5], attrs={'Local1': 'A local attribute'})
>>> data['Var2'] = dm.dmarray([[8,9],[9,1],[3,4],[8,9],[7,8]])
>>> data['MVar'] = dm.dmarray([7.8], attrs={'Note': 'Metadata'})
>>> dm.toJSONheadedASCII('outFile.txt', data, depend0='Var1', order=['Var1'])
#Note that not all field names are required, those not given will be listed
#alphabetically after those that are specified
'''
kwarg_dict = {'delimiter': ' '}
for key in kwarg_dict:
if key in kwargs:
kwarg_dict[key] = kwargs[key]
if not metadata:
metadata = io.StringIO()
writeJSONMetadata(metadata, insd, depend0=depend0, order=order)
metadata.seek(0) #rewind StringIO object to start
hdr = readJSONMetadata(metadata)
datlist = []
for key in hdr:
if 'START_COLUMN' in hdr[key].attrs:
#add to list of (start_col, keyname) pairs
datlist.append((hdr[key].attrs['START_COLUMN'], key, hdr[key].attrs['DIMENSION'][0]))
#also use for data length
datlen = len(insd[key])
if datlen == 0:
raise ValueError('No data present to write: Use writeJSONmetadata')
#TODO: Set this to just default to writing the header out and raise a warning
datlist.sort()
ncols = datlist[-1][0]+datlist[-1][-1]
#now open file (file-like) and for each line in len(data)
#write the line using start_column, name, dimension
data = numpy.zeros([datlen, ncols], dtype=object)
for stcol, name, dim in datlist:
if dim == 1:
data[:, stcol] = _dateToISO(insd[name])
else:
data[:, stcol:stcol+dim] = _dateToISO(insd[name])
hdstr = writeJSONMetadata(None, hdr, depend0=depend0, order=order, returnString=True)
with open(fname, 'w') as fh:
fh.writelines(hdstr)
for line in data:
prline = kwarg_dict['delimiter'].join([str(el) for el in line])
fh.write(''.join([prline, '\n']))
[docs]
def fromRecArray(recarr):
'''Takes a numpy recarray and returns each field as a dmarray in a SpaceData container
Parameters
----------
recarr : numpy record array
object to parse into SpaceData container
Returns
-------
sd: spacepy.datamodel.SpaceData
dict-like containing arrays of named records in recarr
Examples
--------
>>> import numpy as np
>>> import spacepy.datamodel as dm
>>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', float), ('y', int)])
>>> print(x, x.dtype)
array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i4')])
>>> sd = dm.fromRecArray(x)
>>> sd.tree(verbose=1)
+
|____x (spacepy.datamodel.dmarray (2,))
|____y (spacepy.datamodel.dmarray (2,))
'''
sd = SpaceData()
for key in recarr.dtype.fields.keys():
sd[key] = dmarray(recarr[key])
return sd
[docs]
def toRecArray(sdo):
'''Takes a SpaceData and creates a numpy recarray
Parameters
----------
sdo : SpaceData
SpaceData to change to a numpy recarray
Returns
-------
recarr: numpy record array
numpy.recarray object with the same values (attributes are lost)
Examples
--------
>>> import numpy as np
>>> import spacepy.datamodel as dm
>>> sd = dm.SpaceData()
>>> sd['x'] = dm.dmarray([1.0, 2.0])
>>> sd['y'] = dm.dmarray([2,4])
>>> sd.tree(verbose=1)
+
|____x (spacepy.datamodel.dmarray (2,))
|____y (spacepy.datamodel.dmarray (2,))
>>> ra = dm.toRecArray(sd)
>>> print(ra, ra.dtype)
[(2, 1.0) (4, 2.0)] (numpy.record, [('y', '<i8'), ('x', '<f8')])
'''
nametype = numpy.dtype([(k, sdo[k].dtype.str) for k in sdo])
recarr = numpy.rec.fromarrays([sdo[k] for k in sdo], dtype=nametype)
return recarr
[docs]
def dmcopy(dobj):
'''Generic copy utility to return a copy of a (datamodel) object
Parameters
----------
dobj : object
object to return a copy of
Returns
-------
copy_obj: object (same type as input)
copy of input oibject
Examples
--------
>>> import spacepy.datamodel as dm
>>> dat = dm.dmarray([2,3], attrs={'units': 'T'})
>>> dat1 = dm.dmcopy(dat)
>>> dat1.attrs['copy': True]
>>> dat is dat1
False
>>> dat1.attrs
{'copy': True, 'units': 'T'}
>>> dat.attrs
{'units': 'T'}
'''
if isinstance(dobj, (SpaceData, dmarray)):
return copy.deepcopy(dobj)
if isinstance(dobj, numpy.ndarray):
return numpy.copy(dobj)
return copy.copy(dobj)
[docs]
def createISTPattrs(datatype, ndims=1, vartype=None, units=' ', NRV=False):
'''Return set of unpopulated attributes for ISTP compliant variable
Parameters
----------
datatype : {'data', 'support_data', 'metadata'}
datatype of variable to create metadata for.
ndims : int
number of dimensions, default=1
vartype : {'float', 'char', 'int', 'epoch', 'tt2000'}
The type of the variable, default=float
units : str
The units of the variable, default=' '
NRV : bool
Is the variable NRV (non-record varying), default=False
Returns
-------
attrs : dict
dictionary of attributes for the variable
Examples
--------
>>> import spacepy.datamodel as dm
>>> dm.createISTPattrs('data', ndims=2, vartype='float', units='MeV')
{'CATDESC': '',
'DISPLAY_TYPE': 'spectrogram',
'FIELDNAM': '',
'FILLVAL': -1e+31,
'FORMAT': 'F18.6',
'LABLAXIS': '',
'SI_CONVERSION': ' > ',
'UNITS': 'MeV',
'VALIDMIN': '',
'VALIDMAX': '',
'VAR_TYPE': 'data',
'DEPEND_0': 'Epoch',
'DEPEND_1': ''}
'''
fillvals = {'float': -1e31,
'char': '',
'int': numpy.array(-2147483648).astype(numpy.int32),
'epoch': -1.0E31, #datetime.datetime(9999,12,31,23,59,59,999)}
'tt2000': numpy.array(-9223372036854775808).astype(numpy.int64)}
formats = {'float': 'F18.6',
'char': 'A30',
'int': 'I11',
'epoch': '',
'tt2000': 'I21'}
disp = {1: 'time_series',
2: 'spectrogram',
3: 'spectrogram',
4: 'spectrogram'}
if vartype not in fillvals:
fill = -1e31
form = 'F15.6'
else:
fill = fillvals[vartype]
form = formats[vartype]
unit = units
if datatype == 'data':
attrs = {
'CATDESC': '',
'DISPLAY_TYPE': disp[ndims],
'FIELDNAM': '',
'FILLVAL': fill,
'FORMAT': form,
'LABLAXIS': '',
'SI_CONVERSION': ' > ',
'UNITS': unit,
'VALIDMIN': '',
'VALIDMAX': '',
'VAR_TYPE': 'data'
}
for dim in range(ndims):
attrs['DEPEND_{0}'.format(dim)] = ''
attrs['DEPEND_0'] = 'Epoch'
elif datatype == 'support_data':
attrs = {
'CATDESC': '',
'FIELDNAM': '',
'FORMAT': form,
'UNITS': unit,
'VAR_TYPE': 'support_data'
}
for dim in range(ndims):
attrs['DEPEND_{0}'.format(dim)] = ''
if not NRV:
attrs['VALIDMIN'] = ''
attrs['VALIDMAX'] = ''
attrs['FILLVAL'] = fill
attrs['DEPEND_0'] = 'Epoch'
else:
del attrs['DEPEND_0']
elif datatype == 'metadata':
attrs = {
'CATDESC': '',
'FIELDNAM': '',
'FORMAT': form,
'UNITS': unit,
'VAR_TYPE': 'metadata'
}
for dim in range(ndims):
attrs['DEPEND_{0}'.format(dim)] = ''
if not NRV:
attrs['FILLVAL'] = fill
attrs['DEPEND_0'] = 'Epoch'
else:
del attrs['DEPEND_0']
else:
raise ValueError("Invalid datatype (data|support_data|metadata)")
return attrs
def _getVarLengths(data):
"""
get the length of all the variables
Parameters
----------
data : SpaceData
SpaceData object to return the length of the variables
Returns
-------
data : dict
dict of the names and lengths of a SpaceData
"""
ans = {}
for k, v in data.items():
ans[k] = len(v)
return ans
[docs]
def resample(data, time=None, winsize=0, overlap=0, st_time=None, outtimename='Epoch'):
"""
resample a SpaceData to a new time interval
Parameters
----------
data : SpaceData or dmarray
SpaceData with data to resample or dmarray with data to resample,
variables can only be 1d or 2d, if time is specified only variables
the same length as time are resampled, otherwise only variables
with length equal to the longest length are resampled
time : array-like
dmarray of times the correspond to the data
winsize : datetime.timedelta
Time frame to average the data over
overlap : datetime.timedelta
Overlap in the moving average
st_time : datetime.datetime
Starting time for the resample, if not specified the time of the first
data point is used (see spacepy.toolbox.windowMean)
Returns
-------
ans : SpaceData
Resampled data, included keys are in the input keys (with the data caveats above)
and Epoch which contains the output time
Examples
--------
>>> import datetime
>>> import spacepy.datamodel as dm
>>> a = dm.SpaceData()
>>> a.attrs['foo'] = 'bar'
>>> a['a'] = dm.dmarray(range(10*2)).reshape(10,2)
>>> a['b'] = dm.dmarray(range(10)) + 4
>>> a['c'] = dm.dmarray(range(3)) + 10
>>> times = [datetime.datetime(2010, 1, 1) + datetime.timedelta(hours=i) for i in range(10)]
>>> out = dm.resample(a, times, winsize=datetime.timedelta(hours=2), overlap=datetime.timedelta(hours=0))
>>> out.tree(verbose=1, attrs=1)
# +
# :|____foo (str [3])
# |____Epoch (spacepy.datamodel.dmarray (4,))
# |____a (spacepy.datamodel.dmarray (4, 2))
# :|____DEPEND_0 (str [5])
#
# Things to note:
# - attributes are preserved
# - the output variables have their DEPEND_0 changed to Epoch (or outtimename)
# - each dimension of a 2d array is resampled individually
"""
from . import toolbox
# check for SpaceData or dmarray input before going to a bunch of work
if not isinstance(data, (SpaceData, dmarray)):
raise TypeError('Input must be a SpaceData or dmarray object')
if time is None:
time = []
# can only resample variables that have the same length as time,
# if time is default then use all the vals that are the same
# as the longest var
lent = len(time)
if lent == 0:
lent = len(data[max(data, key=lambda k: len(data[k]))])
keys = [k for k in data if len(data[k]) == lent]
# what time are we starting at?
try:
t_int = time.UTC
except AttributeError:
t_int = dmarray(time)
if t_int.any() and ((st_time is None) and isinstance(t_int[0], datetime.datetime)):
st_time = t_int[0].replace(hour=0, minute=0, second=0, microsecond=0)
ans = SpaceData()
ans.attrs = data.attrs
for k in keys:
if len(data[k].shape) > 1:
if len(data[k].shape) > 2:
raise IndexError("Variables can only be 1d or 2d")
for i in range(data[k].shape[1]):
d, t = toolbox.windowMean(data[k][:, i], time=t_int, winsize=winsize,
overlap=overlap, st_time=st_time)
if k not in ans:
ans[k] = dmarray(d)
else:
ans[k] = dmarray.vstack(ans[k], d)
ans[k] = ans[k].T
else:
d, t = toolbox.windowMean(data[k], time=t_int, winsize=winsize,
overlap=overlap, st_time=st_time)
ans[k] = dmarray(d)
try:
ans[k].attrs = data[k].attrs
except AttributeError: # was not a dmarray
pass
ans[k].attrs['DEPEND_0'] = outtimename
ans[outtimename] = dmarray(t)
return ans