Source code for spacepy.datamodel

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Data model conceptually similar to HDF5 and CDF.

For more documentation :doc:`../datamodel`

Authors: Steve Morley and Brian Larsen

Additional Contributors: Charles Kiyanda and Miles Engel

Institution: Los Alamos National Laboratory

Contact: smorley@lanl.gov; balarsen@lanl.gov

Copyright 2010-2016 Los Alamos National Security, LLC.

"""

import collections.abc
import copy
import datetime
import gzip
import io
import itertools
import json
from functools import partial
import os
import re
import warnings

import numpy
# from . import toolbox # handled in functions that use it


__contact__ = 'Steve Morley, smorley@lanl.gov'

str_classes = (str, bytes)


[docs]
class DMWarning(Warning):
    """
    Warnings class for datamodel, subclassed so it can be set to always
    """
    pass

warnings.simplefilter('always', DMWarning)


[docs]
class MetaMixin(object):
    """Mixin class that adds a 'meta' attribute that acts like 'attrs'

    Recommendation from the Python Heliophysics community is to allow
    access to metadata via either an ``attrs`` attribute or ``meta``.
    This mixin class supports that recommendation.
    """

    @property
    def meta(self):
        """Equivalent to ``attrs``

        Some APIs use ``attrs`` for metadata; some use ``meta``. This
        is a convenience property to make it easier for those familiar
        with the ``meta`` convention.
        """
        return self.attrs

    @meta.setter
    def meta(self, v):
        """Set meta as with attrs"""
        self.attrs = v

    @meta.deleter
    def meta(self):
        """Remove meta (and thus attrs)

        This isn't a good idea but you can do it with attrs,
        so might as well support it in meta.

        This still leaves the meta property hanging around, but
        cannot delete a property from an instance (have to delete
        from the class).
        """
        del self.attrs




[docs]
class ISTPArray:
    """Mixin class for array using ISTP metadata.

    Array types like `dmarray` provide all these methods; they assume
    attributes of the array use the
    `ISTP metadata standard <https://spdf.gsfc.nasa.gov/sp_use_of_cdf.html>`_
    and are unlikely to give good results if that is not the case.

    Note that some operations that may seem to relate to an array (e.g.
    uncertainties) may require the use of other arrays in a container;
    these are in `ISTPContainer`.

    .. versionadded:: 0.5.0
    """
    attrs: collections.abc.Mapping


[docs]
    def replace_invalid(self):
        """Return data from array with invalid values replaced by `~numpy.nan`.

        Makes a copy of the data and, for any values equal to the
        ``FILLLVAL`` attribute, greater than ``VALIDMAX``, or less than
        ``VALIDMIN``, replace with NaN.

        Returns
        -------
        `~numpy.ndarray`
            Transformed data

        See Also
        --------
        .pycdf.istp.nanfill : an in-place variant

        Notes
        -----
        .. versionadded:: 0.5.0

        Comparisons with ``FILLVAL`` are done using `~numpy.isclose` and
        so may replace values that are near, but not identical, to fill.
        """
        data = numpy.array(self)
        idx = numpy.zeros_like(data, dtype=bool)
        if self.attrs.get('FILLVAL') is not None:
            idx |= numpy.isclose(data, self.attrs['FILLVAL'])
        if self.attrs.get('VALIDMIN') is not None:
            idx |= data < self.attrs['VALIDMIN']
        if self.attrs.get('VALIDMAX') is not None:
            idx |= data > self.attrs['VALIDMAX']
        data[idx] = numpy.nan
        return data



[docs]
    def plot_as_line(self):
        """Determines if this array is better plotted as a lineplot or spectrogram.

        Uses array shape and the ``DISPLAY_TYPE`` attribute to determine
        if should be plotted as a lineplot (potentially stacked) or spectrogram.

        Returns
        -------
        `bool`
            ``True`` if should be a lineplot, ``False`` if should be a
            spectrogram

        Notes
        -----
        .. versionadded:: 0.5.0
        """
        if 'DISPLAY_TYPE' in self.attrs:
            return self.attrs['DISPLAY_TYPE'] == 'time_series'
        dims = len(self.shape)
        if dims == 1:
            return True
        if dims > 2:
            return True
        # Reasonable dividing line is probably 4 stacked line plots
        return self.shape[-1] < 5



[docs]
    def units(self, fmt='minimal'):
        """Finds units of array.

        Looks up the unit attribute and performs minor cleanup (including
        intepreting IDL codes).

        Returns
        -------
        `str`
            Physical units of this array. `None` if not present.

        Parameters
        ----------
        fmt : {'minimal', 'latex', 'astropy', 'raw'}
            How to format the units: ``minimal`` (default) is a
            minimally-processed rendering, ``latex`` is in LaTeX,
            ``astropy`` is meant to give good results when passed to
            `astropy.units.Unit`, and ``raw`` has no processing. No
            checks are done on processing for AstroPy or LaTeX, and it
            should not be assumed they will parse.

        Notes
        -----
        .. versionadded:: 0.6.0
        """
        u = self.attrs.get('UNITS', None)
        if fmt == 'raw' or u is None:
            return u
        if fmt == 'astropy':
            u = u.replace('#', '1')
        elif fmt == 'latex':
            u = u.replace('#', '\\#')
        if fmt in ('minimal', 'astropy'):
            u = re.sub(r'![EU]([^!]*)!N', r'^\1', u)  # IDL to exponent
            u = re.sub(r'\^{([^!]*)}', r'^\1', u)  # LaTeX to exponent
        if fmt == 'minimal':
            u = re.sub(r'(?<=\d)(?=[\w^_])', r' ', u)  # Insert spaces
        if fmt == 'astropy':
            # Common substitutions
            for orig, ap in (('ster', 'sr'),
                             ('cc', 'cm^3'),
                             ):
                u = re.sub(fr'((?<=[\W\d])|^){orig}(?=[\W\d]|$)', ap, u)
        if fmt == 'latex':
            u = re.sub(r'![EU]([^!]*)!N', r'^{\1}', u)  # IDL to exponent
        return u



[docs]
    def toQuantity(self, copy=True):
        """Convert to Astropy Quantity

        Converts this array to an Astropy `~astropy.units.Quantity`.
        Invalid values are replaced with `~numpy.nan`.

        Returns
        -------
        `~astropy.units.Quantity`
            Data from this array interpreted according to its ``UNITS``
            attribute.

        Other Parameters
        ----------------
        copy : `bool`, default ``True``
            Copy data to the Quantity. If ``False``, changes to the
            Quantity may affect the source array. In some cases a copy
            may be made even if ``False``.

        Notes
        -----
        .. versionadded:: 0.6.0

        Examples
        --------
        >>> import spacepy.datamodel
        # https://rbsp-ect.newmexicoconsortium.org/data_pub/rbspa/ECT/level2/
        >>> data = spacepy.datamodel.fromCDF(
        ...     'rbspa_ect-elec-L2_20140115_v2.1.0.cdf')
        >>> q = data['Position'].toQuantity()
        >>> q.to('m')
        <Quantity [[-32833200. , -15531762. ,  -6449212. ],
                   [-32903586. , -15406271. ,  -6448704.5],
                   [-32967848. , -15277711. ,  -6446542.5],
                   ...,
                   [-20966128. ,   6941849.5,  -2896334.2],
                   [-21515586. ,   6858618. ,  -3026324. ],
                   [-22047328. ,   6783260.5,  -3153003.5]] m>
        """
        import astropy.units
        data = self.replace_invalid()  # makes copy
        if not numpy.isnan(data).any() and not copy:
            data = self[...]
        q = astropy.units.Quantity(data, self.units(fmt='astropy'), copy=False)
        return q



[docs]
    @classmethod
    def fromQuantity(cls, q, copy=True):
        """Convert from Astropy Quantity

        Converts an Astropy `~astropy.units.Quantity` to an ISTP array.
        `~numpy.nan` are replaced with fill.

        Parameters
        ----------
        q : `~astropy.units.Quantity`
            Quantity to convert

        Returns
        -------
        `ISTPArray`
            Array with attributes which can be inferred from input.
            This may not be fully ISTP-compliant.

        Other Parameters
        ----------------
        copy : `bool`, default ``True``
            Copy data from the Quantity. If ``False``, changes to the
            Quantity may affect this array. In some cases a copy
            may be made even if ``False``.

        Notes
        -----
        .. versionadded:: 0.6.0
        """
        import astropy.units
        fill = numpy.isnan(q.value)
        if fill.any():
            copy = True
        data = q.value.copy() if copy else q.value
        data[fill] = -1e31  # Quantities are always float
        s = q.unit.si
        # Force scientific notation, remove superfluous signs and zeros
        scale = re.sub(r'0*e\+?0*', 'e', f'{s.scale:#e}', count=1)
        # Unscaled formatter is deprecated, so strip the scale the hard way
        unscaled = (s / astropy.units.Unit(s.scale)).to_string()
        # and remove extra spaces around operators
        unscaled = re.sub(r'\s+([^\w])\s+', r'\1', unscaled)
        attrs = {
            'FILLVAL': -1e31,
            'SI_Conversion': f'{scale}>{unscaled}',
            'UNITS': q.unit.to_string(),
        }
        out = cls(data, attrs)
        return out





[docs]
class ISTPContainer(collections.abc.Mapping):
    """Mixin class for containers using ISTP metadata.

    Container types like `SpaceData` provide all these methods; they assume
    attributes of the container and the arrays it contains use the
    `ISTP metadata standard <https://spdf.gsfc.nasa.gov/sp_use_of_cdf.html>`_
    and are unlikely to give good results if that is not the case.

    .. versionadded:: 0.5.0
    """
    attrs:  collections.abc.Mapping


[docs]
    def lineplot(self, vname, target=None):
        """Line plot of a value (array) from this container

        Parameters
        ----------
        vname : `str`
            The key into this container of the value to plot (i.e.,
            the name of the variable).

        target : `matplotlib.axes.Axes` or `matplotlib.figure.Figure`, optional
            Where to draw the plot. Default is to create a new figure with
            a single subplot. If ``Axes``, will draw into that subplot (and
            will not draw a legend or figure title); if ``Figure``, will
            make a single subplot (and not set figure title). Handled by
            `~.plot.utils.set_target`.

        Returns
        -------
        ax : `matplotlib.axes.Axes`
            The subplot on which the variable was plotted

        Notes
        -----
        .. versionadded:: 0.5.0
        """
        import spacepy.plot.utils
        v = self[vname]
        fig, ax = spacepy.plot.utils.set_target(target)
        x = self[v.attrs['DEPEND_0']]
        data = v.replace_invalid()
        labels = None
        if v.attrs.get('LABL_PTR_1'):
            labels = self[v.attrs['LABL_PTR_1']]
        deltas = self.get_deltas(vname)
        plot_kwargs = {}
        if len(data.shape) == 1:
            data = data[..., None]
        if deltas and len(deltas[0].shape) == 1:
            deltas = tuple([d[..., None] for d in deltas])
        for dim in range(data.shape[-1]):
            if labels is not None:
                plot_kwargs['label'] = labels[dim]
            if deltas:
                if len(deltas) == 1:
                    yerr = deltas[0][:, dim]
                else:
                    yerr = numpy.stack((deltas[0][:, dim], deltas[1][:, dim]))
                ax.errorbar(numpy.array(x), data[:, dim], yerr=yerr, **plot_kwargs)
            else:
                ax.plot(numpy.array(x), data[:, dim], **plot_kwargs)
        ylabel = v.attrs.get('LABLAXIS', '')
        u = v.units(fmt='latex')
        if u.strip():
            ylabel = '{}{}(${}$)'.format(ylabel, ' ' if ylabel else '', u)
        if ylabel:
            ax.set_ylabel(ylabel)
        if x.attrs.get('LABLAXIS'):
            ax.set_xlabel(x.attrs['LABLAXIS'])
        if labels is not None and target is not ax:
            ax.legend(loc='best')
        if target is None and v.attrs.get('CATDESC'):
            fig.suptitle(v.attrs['CATDESC'])
        spacepy.plot.utils.applySmartTimeTicks(ax, x)
        return ax



[docs]
    def main_vars(self):
        """Return names of the 'main' variables in this container.

        These are variables that are likely to be of direct interest, rather
        than dependencies and support data. They are chosen primarily by
        not being dependencies of other variables, but if the ``VAR_TYPE``
        attribute is present it must be ``data``.

        Returns
        -------
        `list` of `str`

        Notes
        -----
        .. versionadded:: 0.5.0
        """
        referenced = set()
        for k, v in self.items():
            referenced.update([v.attrs[a] for a in v.attrs
                               if a.startswith(('DEPEND_', 'LABL_PTR_', 'DELTA_'))])
        main = sorted(set(self).difference(referenced))
        if any(('VAR_TYPE' in v.attrs for v in self.values())):
            main = [m for m in main if self[m].attrs.get('VAR_TYPE', '') == 'data']
        return main



[docs]
    def plot(self, vnames=None, fig=None):
        """Plot one or more values (arrays) from this container

        Parameters
        ----------
        vnames : `list` of `str`, optional.
            The key into this container of the value(s) to plot (i.e.,
            the name of the variable). If not specified, plots all
            'main' variables which are not dependencies of others;
            see `main_vars`.

        fig : `matplotlib.figure.Figure`, optional
            Where to draw the plot. Default is to create a new figure. If
            given, subplots will be added to this figure (it should start
            empty).

        Returns
        -------
        fig : `matplotlib.figure.Figure`
            The figure on which the variables were plotted

        See Also
        --------
        lineplot : to line plot a single variable
        spectrogram : to make a spectrogram of a single variable

        Notes
        -----
        .. versionadded:: 0.5.0

        Examples
        --------

        >>> import spacepy.datamodel
        # https://rbsp-ect.newmexicoconsortium.org/data_pub/rbspa/ECT/level2/
        >>> data = spacepy.datamodel.fromCDF(
        ...     'rbspa_ect-elec-L2_20140115_v2.1.0.cdf')
        >>> fig = data.plot(['FESA', 'Position'])
        >>> fig.show()  # if needed

        >>> import spacepy.pycdf
        # https://rbsp-ect.newmexicoconsortium.org/data_pub/rbspa/hope/level2/spinaverage/
        >>> with spacepy.pycdf.CDF('rbspa_rel04_ect-hope-sci-L2SA_20140108_v6.1.0.cdf') as f:
        ...     data = f.copy()
        >>> fig = data.plot(['FESA', 'FPSA'])
        >>> fig.show()  # if needed

        >>> import spacepy.pycdf
        # https://spp-isois.sr.unh.edu/data_public/ISOIS/level2/
        >>> with spacepy.pycdf.CDF('psp_isois_l2-summary_20201130_v13.cdf') as f:
        ...     data = f.copy()
        >>> fig = data.plot(['A_H_Rate_TS', 'H_CountRate_ChanP_SP'])
        >>> fig.show()  # if needed
        """
        if fig is None:
            import matplotlib.pyplot
            fig = matplotlib.pyplot.figure()
        if isinstance(vnames, collections.abc.Hashable) and vnames in self:
            vnames = [vnames]
        if vnames is None:
            vnames = self.main_vars()
        n_plots = len(vnames)
        for i, k in enumerate(vnames):
            ax = fig.add_subplot(n_plots, 1, i + 1)
            if self[k].plot_as_line():
                self.lineplot(k, target=ax)
                h, l = ax.get_legend_handles_labels()
                if l:
                    ax.legend(h, l, loc='best')
            else:
                self.spectrogram(k, target=ax)
        return fig



[docs]
    def spectrogram(self, vname, target=None):
        """Spectrogram plot of a value (array) from this container

        Parameters
        ----------
        vname : `str`
            The key into this container of the value to plot (i.e.,
            the name of the variable).

        target : `matplotlib.axes.Axes` or `matplotlib.figure.Figure`, optional
            Where to draw the plot. Default is to create a new figure with
            a single subplot. If ``Axes``, will draw into that subplot (and
            will not set figure title); if ``Figure``, will make a single
            subplot (and not set figure title). Handled by
            `~.plot.utils.set_target`.

        Returns
        -------
        ax : `matplotlib.axes.Axes`
            The subplot on which the variable was plotted

        Notes
        -----
        .. versionadded:: 0.5.0
        """
        import matplotlib.cm
        import spacepy.plot.utils
        v = self[vname]
        fig, ax = spacepy.plot.utils.set_target(target)
        x = self[v.attrs['DEPEND_0']]
        data = v.replace_invalid()
        x = self[v.attrs['DEPEND_0']]
        y = self[v.attrs['DEPEND_1']]
        zlabel = v.attrs.get('LABLAXIS', '')
        u = v.units(fmt='latex')
        if u.strip():
            zlabel = '{}{}(${}$)'.format(
                zlabel, ' ' if zlabel else '', u)
        zlabel = zlabel if zlabel else None
        try:  # mpl >=3.7
            cmap = matplotlib.colormaps.get_cmap(None)
        except AttributeError:
            cmap = matplotlib.cm.get_cmap()
        cmap = copy.copy(cmap)
        if cmap(-1.)[:3] == cmap(0.)[:3]:  # Underflow to black if not specified
            cmap.set_under('k')
        # Fill to grey or white
        if cmap(numpy.nan)[:3] == cmap(0.)[:3] and cmap(numpy.nan)[-1] > 0.:
            cmap.set_bad((.5, .5, .5, 0.) if cmap(1.)[:3] == (1., 1., 1.)
                         else (1., 1., 1., 0.))
        ax = spacepy.plot.simpleSpectrogram(numpy.array(x), numpy.array(y), data, cbtitle=zlabel,
                                            ax=ax, zero_valid=True, cmap=cmap)
        ylabel = y.attrs.get('LABLAXIS', '')
        u = y.units(fmt='latex')
        if u.strip():
            ylabel = '{}{}(${}$)'.format(
                ylabel, ' ' if ylabel else '', u)
        if ylabel:
            ax.set_ylabel(ylabel)
        if x.attrs.get('LABLAXIS'):
            ax.set_xlabel(x.attrs['LABLAXIS'])
        if target is None and v.attrs.get('CATDESC'):
            fig.suptitle(v.attrs['CATDESC'])
        spacepy.plot.utils.applySmartTimeTicks(ax, x)
        return ax



[docs]
    def get_deltas(self, vname):
        """Return deltas for an array

        Returns ISTP delta values. These may be uncertainties or may
        be e.g. bin widths; interpretation is undefined.

        Invalid values are replaced with `~numpy.nan`.

        Parameters
        ----------
        vname : `str`
            The key into this container of the value to get delta
            (i.e.,  the name of the variable).

        Returns
        -------
        deltas : `tuple` of `~numpy.ndarray`
            Deltas for ``vname``. Empty if no deltas available;
            one-element if symmetric; two-element if not symmetric.

        Notes
        -----
        .. versionadded:: 0.5.0
        """
        v = self[vname]
        asymmetric_msg = 'Only one of DELTA_(MINUS|PLUS)_VAR specified.'
        if 'DELTA_PLUS_VAR' not in v.attrs:
            if 'DELTA_MINUS_VAR' in v.attrs:
                raise ValueError(asymmetric_msg)
            return ()
        elif 'DELTA_MINUS_VAR' not in v.attrs:
            raise ValueError(asymmetric_msg)
        dp = self[v.attrs['DELTA_PLUS_VAR']].replace_invalid()
        if v.attrs['DELTA_PLUS_VAR'] == v.attrs['DELTA_MINUS_VAR']:
            return(dp,)
        return(self[v.attrs['DELTA_MINUS_VAR']].replace_invalid(), dp)



[docs]
    def toDataFrame(self, vname=None, copy=True):
        """Convert to Pandas DataFrame

        Converts one variable (and its dependencies) to a Pandas
        `~pandas.DataFrame`. Invalid values are replaced with `~numpy.nan`.

        Parameters
        ----------
        vname : `str`, optional
            The key into this container of the value to convert
            (i.e.,  the name of the variable). Strongly recommended;
            if not specified, will try to find one using `main_vars`,
            and raise `ValueError` if there is more than one candidate.

        Returns
        -------
        `~pandas.DataFrame`
            Data from the array named by ``vname`` and its dependencies.

        Other Parameters
        ----------------
        copy : `bool`, default ``True``
            Copy data to the DataFrame. If ``False``, changes to the
            DataFrame may affect the source data. In some cases a copy
            may be made even if ``False``.

        Notes
        -----
        .. versionadded:: 0.6.0

        Examples
        --------
        >>> import spacepy.datamodel
        # https://rbsp-ect.newmexicoconsortium.org/data_pub/rbspa/ECT/level2/
        >>> data = spacepy.datamodel.fromCDF(
        ...     'rbspa_ect-elec-L2_20140115_v2.1.0.cdf')
        >>> df = data.toDataFrame('Position')
        >>> df.plot()
        """
        import pandas
        if vname is None:
            main_vars = self.main_vars()
            if len(main_vars) != 1:
                matches = ', '.join(main_vars) if main_vars else 'none'
                raise ValueError(
                    f'No variable specified; possible matches: {matches}.')
            vname = main_vars[0]
        a = self[vname].attrs
        data = self[vname].replace_invalid()  # makes copy
        if not numpy.isnan(data).any() and not copy:
            data = self[vname][...]
        if 'LABL_PTR_1' in a:
            columns = self[a['LABL_PTR_1']][...]
        else:
            columns = [a.get('FIELDNAM', vname)]
            if len(data.shape) > 1:
                columns *= data.shape[-1]
        df = pandas.DataFrame(
            data=data, index=self[a['DEPEND_0']][...],
            columns=columns, copy=False)
        return df



[docs]
    @classmethod
    def fromDataFrame(cls, df, copy=True):
        """Convert from Pandas DataFrame

        Converts a Pandas `~pandas.DataFrame` to ISTP-compliant type.
        `~numpy.nan` are replaced with fill.

        Parameters
        ----------
        df : `~pandas.DataFrame`
            Data frame to convert

        Returns
        -------
        `ISTPContainer`
            ISTP-compliant container representing the dataframe's data.
            This may not be fully ISTP-compliant; the minimium attributes
            required to represent the DataFrame are used.

        Other Parameters
        ----------------
        copy : `bool`, default ``True``
            Copy data from the DataFrame. If ``False``, changes to the
            output may affect the DataFrame. In some cases a copy
            may be made even if ``False``.

        Notes
        -----
        .. versionadded:: 0.6.0
        """
        output = cls()
        # assume can hold a dmarray, a CDF will convert it
        fill = numpy.isnan(df.values)
        if fill.any():
            copy = True
        data = df.values.copy() if copy else df.values
        vartype = {
            'f': 'float',
            'i': 'int',
            'U': 'char',
        }.get(data.dtype.kind, 'float')
        attrs = createISTPattrs('data', ndims=2, vartype=vartype)
        attrs.update({
            'DEPEND_1': 'ColumnNumbers',
            'DISPLAY_TYPE': 'time_series',
            'FIELDNAM': 'data',
            'LABL_PTR_1': 'Labels',
        })
        for k in ('CATDESC', 'LABLAXIS', 'SI_CONVERSION', 'UNITS',
                  'VALIDMIN', 'VALIDMAX'):
            del attrs[k]
        output['data'] = dmarray(data, attrs=attrs)
        output['data'][fill] = -1e31
        attrs = createISTPattrs('support_data', vartype='tt2000')
        attrs.update({
            'FIELDNAM': 'Epoch',
        })
        del attrs['CATDESC']
        output['Epoch'] = dmarray(df.index.to_pydatetime(), attrs=attrs)
        attrs = createISTPattrs('metadata', vartype='char', NRV=True)
        attrs.update({
            'FIELDNAM': 'Labels',
        })
        del attrs['CATDESC']
        output['Labels'] = dmarray(df.columns.values, attrs=attrs, dtype='U')
        attrs = createISTPattrs('support_data', vartype='int', NRV=True)
        attrs.update({
            'FIELDNAM': 'ColumnNumbers',
        })
        del attrs['CATDESC']
        output['ColumnNumbers'] = dmarray(numpy.arange(len(df.columns)),
                                          attrs=attrs)
        return output





[docs]
class dmarray(numpy.ndarray, MetaMixin, ISTPArray):
    """
    Container for data within a SpaceData object

    Although the format of attributes is not enforced, using ISTP metadata
    enables the use of methods from `ISTPArray`.

    .. versionchanged:: 0.7.0
       Operations on a ``dmarray`` which return a scalar value return a numpy
       :std:term:`array scalar`. Previously they would return the base
       Python type. Assignment to ``dtype`` and ``shape`` are also now
       supported; previously this raised :exc:`TypeError`.

    Raises
    ------
    NameError
        raised is the request name was not added to the allowed attributes list

    Examples
    --------
    >>> import spacepy.datamodel as datamodel
    >>> position = datamodel.dmarray([1,2,3], attrs={'coord_system':'GSM'})
    >>> position
    dmarray([1, 2, 3])
    >>> position.attrs
    {'coord_system': 'GSM'}a

    The dmarray, like a numpy ndarray, is versatile and can store
    any datatype; dmarrays are not just for arrays.

    >>> name = datamodel.dmarray('TestName')
    dmarray('TestName')

    To extract the string (or scalar quantity), use the tolist method

    >>> name.tolist()
    'TestName'

    See methods of `ISTPArray` if attributes are ISTP-compliant.
    """
    Allowed_Attributes = ['attrs']

    def __new__(cls, input_array, attrs=None, dtype=None):
        # Input array is an already formed ndarray instance
        # We first cast to be our class type
        if not dtype:
            obj = numpy.asarray(input_array).view(cls)
        else:
            obj = numpy.asarray(input_array).view(cls).astype(dtype)
        # add the new attribute to the created instance
        if attrs != None:
            obj.attrs = attrs
        else:
            obj.attrs = {}
        # Finally, return the newly created object:
        return obj

    def __array_finalize__(self, obj):
       # see InfoArray.__array_finalize__ for comments
        if obj is None:
            return
        for val in self.Allowed_Attributes:
            self.__setattr__(val, copy.deepcopy(getattr(obj, val, {})))

    def __array_wrap__(self, out_arr, context=None, return_scalar=None):
        #check for zero-dims (numpy bug means subclass behaviour isn't consistent with ndarray
        #this traps most of the bad behaviour ( std() and var() still problems)
        if return_scalar:
            return super().__array_wrap__(out_arr, context, True)[()]
        if return_scalar is False:
            return super().__array_wrap__(out_arr, context, False)
        # Pre numpy 2.0, so guess if scalar
        if out_arr.ndim:
            return super().__array_wrap__(out_arr, context)
        return super().__array_wrap__(out_arr, context)[()]

    def __reduce__(self):
        """This is called when pickling, see:
        http://www.mail-archive.com/numpy-discussion@scipy.org/msg02446.html
        for this particular example.
        Only the attributes in Allowed_Attributes can exist
        """
        object_state = list(numpy.ndarray.__reduce__(self))
        subclass_state = tuple([tuple([val, self.__getattribute__(val)]) for val in self.Allowed_Attributes])
        object_state[2] = (object_state[2], subclass_state)
        return tuple(object_state)

    def __setstate__(self, state):
        """Used for unpickling after __reduce__ the self.attrs is recovered from
        the way it was saved and reset.
        """
        nd_state, own_state = state
        numpy.ndarray.__setstate__(self, nd_state)
        for i, val in enumerate(own_state):
            if not val[0] in self.Allowed_Attributes: # this is attrs
                self.Allowed_Attributes.append(own_state[i][0])
            self.__setattr__(own_state[i][0], own_state[i][1])

    def __setattr__(self, name, value):
        """Make sure that .attrs is the only attribute that we are allowing
        dmarray_ne took 15.324803 s
        dmarray_eq took 15.665865 s
        dmarray_assert took 16.025478 s
        It looks like != is the fastest, but not by much over 10000000 __setattr__
        """
        # Several attrs special-cased because should NOT be pickled
        if name in ('Allowed_Attributes', 'dtype', 'meta', 'shape'):
            pass
        elif not name in self.Allowed_Attributes:
            raise TypeError("Only attribute listed in Allowed_Attributes can be set")
        super(dmarray, self).__setattr__(name, value)


[docs]
    def addAttribute(self, name, value=None):
        """Method to add an attribute to a dmarray
        equivalent to
        a = datamodel.dmarray([1,2,3])
        a.Allowed_Attributes = a.Allowed_Attributes + ['blabla']
        """
        if name in self.Allowed_Attributes:
            raise NameError('{0} is already an attribute cannot add again'.format(name))
        self.Allowed_Attributes.append(name)
        self.__setattr__(name, value)



[docs]
    def count(self, srchval):
        """
        Equivalent to count method on list

        """
        mask = self == srchval
        return int(mask.sum())

    def _saveAttrs(self):
        Allowed_Attributes = self.Allowed_Attributes
        backup = []
        for atr in Allowed_Attributes:
            backup.append((atr, dmcopy(self.__getattribute__(atr))))
        return backup

    @classmethod
    def _replaceAttrs(cls, arr, backup):
        for key, val in backup:
            if key != 'attrs':
                try:
                    arr.addAttribute(key)
                except NameError:
                    pass
            arr.__setattr__(key, val)
        return arr


[docs]
    @classmethod
    def append(cls, one, other):
        """
        append data to an existing dmarray
        """
        backup = one._saveAttrs()
        outarr = dmarray(numpy.append(one, other))
        return cls._replaceAttrs(outarr, backup)



[docs]
    @classmethod
    def vstack(cls, one, other):
        """
        vstack data to an existing dmarray
        """
        backup = one._saveAttrs()
        outarr = dmarray(numpy.vstack((one, other)))
        return cls._replaceAttrs(outarr, backup)



[docs]
    @classmethod
    def hstack(cls, one, other):
        """
        hstack data to an existing dmarray
        """
        backup = one._saveAttrs()
        outarr = dmarray(numpy.hstack((one, other)))
        return cls._replaceAttrs(outarr, backup)



[docs]
    @classmethod
    def dstack(cls, one, other):
        """
        dstack data to an existing dmarray
        """
        backup = one._saveAttrs()
        outarr = dmarray(numpy.dstack((one, other)))
        return cls._replaceAttrs(outarr, backup)



[docs]
    @classmethod
    def concatenate(cls, one, other, axis=0):
        """
        concatenate data to an existing dmarray
        """
        backup = one._saveAttrs()
        outarr = dmarray(numpy.concatenate((one, other), axis=axis))
        return cls._replaceAttrs(outarr, backup)




[docs]
def dmfilled(shape, fillval=0, dtype=None, order='C', attrs=None):
    """
    Return a new dmarray of given shape and type, filled with a specified value (default=0).

    See Also
    --------
    numpy.ones

    Examples
    --------
    >>> import spacepy.datamodel as dm
    >>> dm.dmfilled(5, attrs={'units': 'nT'})
    dmarray([ 0.,  0.,  0.,  0.,  0.])

    >>> dm.dmfilled((5,), fillval=1, dtype=np.int)
    dmarray([1, 1, 1, 1, 1])

    >>> dm.dmfilled((2, 1), fillval=np.nan)
    dmarray([[ nan],
           [ nan]])

    >>> a = dm.dmfilled((2, 1), np.nan, attrs={'units': 'nT'})
    >>> a
    dmarray([[ nan],
           [ nan]])
    >>> a.attrs
    {'units': 'nT'}
    """
    a = dmarray(numpy.empty(shape, dtype, order), attrs=attrs)
    a.fill(fillval)
    return a




[docs]
class SpaceData(dict, MetaMixin, ISTPContainer):
    """
    Datamodel class extending dict by adding attributes.

    Although the format of attributes is not enforced, using ISTP metadata
    enables the use of methods from `ISTPContainer`.
    """
    def __getitem__(self, key):
        """
        This allows one to make a SpaceData indexed with an iterable of
        keys to return a new spacedata made of the subset of keys
        """
        try:
            return super(SpaceData, self).__getitem__(key)
        except (KeyError, TypeError):
            if isinstance(key, (tuple, list)):
                # make a new SpaceData from these keys
                out = SpaceData()
                out.attrs = self.attrs
                for k in key:
                    out[k] = self[k]
                return out
            else:
                raise KeyError('{0}'.format(key))


[docs]
    def __init__(self, *args, **kwargs):
        """
        Base class for "Data Model" representation data

        Abstract method, reimplement

        Attributes
        ----------
        attrs : dict
            dictionary of the attributes of the SpaceData object

        """
        #raise(ValueError("Abstract method called, reimplement __init__"))
        self.attrs = {}
        if 'attrs' in kwargs:
            if hasattr(kwargs['attrs'], '__getitem__'):
                self.attrs = kwargs['attrs']
            del kwargs['attrs']

        super(SpaceData, self).__init__(*args, **kwargs)
        self.toCDF = partial(toCDF, SDobject=self)
        self.toCDF.__doc__ = toCDF.__doc__
        self.toHDF5 = partial(toHDF5, SDobject=self)
        self.toHDF5.__doc__ = toHDF5.__doc__
        self.toJSONheadedASCII = partial(toJSONheadedASCII, insd=self)
        self.toJSONheadedASCII.__doc__ = toJSONheadedASCII.__doc__


## To enable string output of repr, instead of just printing, uncomment his block
#    def __repr__(self):
#        #redirect stdout to StringIO
#        import io, sys
#        dum = io.StringIO()
#        sys_stdout_save = sys.stdout
#        sys.stdout = dum
#        self.tree(verbose=True)
#        sys.stdout = sys_stdout_save
#        dum.seek(0)
#        return ''.join(dum.readlines())


[docs]
    def tree(self, **kwargs):
        '''Print the contents of the SpaceData object in a visual tree

        Other Parameters
        ----------------
        verbose : bool, default False
            print more info
        spaces : str (optional)
            string will added for every line
        levels : int (optional)
            number of levels to recurse through (True, the default,  means all)
        attrs : bool, default False
            display information for attributes
        print_out : bool, default True

                .. versionadded:: 0.5.0

            Print output (original behavior); if ``False``, return the output.

        Examples
        --------
        >>> import spacepy.datamodel as dm
        >>> import spacepy.toolbox as tb
        >>> a = dm.SpaceData()
        >>> a['1'] = dm.SpaceData(dog = 5)
        >>> a['4'] = dm.SpaceData(cat = 'kitty')
        >>> a['5'] = 4
        >>> a.tree()
        +
        |____1
             |____dog
        |____4
             |____cat
        |____5

        See Also
        --------
        :class:`~spacepy.toolbox.dictree`
        '''
        from . import toolbox
        return toolbox.dictree(self, **kwargs)



[docs]
    def flatten(self):
        '''
        Method to collapse datamodel to one level deep

        Examples
        --------
        >>> import spacepy.datamodel as dm
        >>> import spacepy.toolbox as tb
        >>> a = dm.SpaceData()
        >>> a['1'] = dm.SpaceData(dog = 5, pig = dm.SpaceData(fish=dm.SpaceData(a='carp', b='perch')))
        >>> a['4'] = dm.SpaceData(cat = 'kitty')
        >>> a['5'] = 4
        >>> a.tree()
        +
        |____1
             |____dog
             |____pig
                  |____fish
                       |____a
                       |____b
        |____4
             |____cat
        |____5

        >>> b = dm.flatten(a)
        >>> b.tree()
        +
        |____1<--dog
        |____1<--pig<--fish<--a
        |____1<--pig<--fish<--b
        |____4<--cat
        |____5

        >>> a.flatten()
        >>> a.tree()
        +
        |____1<--dog
        |____1<--pig<--fish<--a
        |____1<--pig<--fish<--b
        |____4<--cat
        |____5

        '''
        flatobj = flatten(self)
        remkeys = [key for key in self]
        for key in remkeys:
            del self[key]
        for key in flatobj:
            self[key] = copy.copy(flatobj[key])


    # Stubs of partialed-in functions for docs; actual versions populated
    # when class instantiated


[docs]
    def toCDF(fname, **kwargs):
        """Create CDF file from this SpaceData.

        See `toCDF`; this object is provided for ``SDobject``."""



[docs]
    def toHDF5(fname, **kwargs):
        """Create HDF5 file from this SpaceData.

        See `toHDF5`; this object is provided for ``SDObject``. """



[docs]
    def toJSONheadedASCII(fname, **kwargs):
        """Create JSON-headed ASCII file from this SpaceData.

        See `toJSONheadedASCII`; this object is provided for ``insd``."""





[docs]
def convertKeysToStr(SDobject):
    if isinstance(SDobject, SpaceData):
        newSDobject = SpaceData()
        newSDobject.attrs = SDobject.attrs
    else:
        newSDobject = {}
    for key in SDobject:
        if not isinstance(key, str_classes):
            if isinstance(SDobject[key], dict):
                newSDobject[str(key)] = convertKeysToStr(SDobject[key])
            else:
                newSDobject[str(key)] = SDobject[key]
        else:
            if isinstance(SDobject[key], dict):
                newSDobject[key] = convertKeysToStr(SDobject[key])
            else:
                newSDobject[key] = SDobject[key]

    return newSDobject




[docs]
def flatten(dobj):
    '''Collapse datamodel to one level deep

    Examples
    --------

    >>> import spacepy.datamodel as dm
    >>> import spacepy.toolbox as tb
    >>> a = dm.SpaceData()
    >>> a['1'] = dm.SpaceData(dog = 5, pig = dm.SpaceData(fish=dm.SpaceData(a='carp', b='perch')))
    >>> a['4'] = dm.SpaceData(cat = 'kitty')
    >>> a['5'] = 4
    >>> a.tree()
    +
    |____1
         |____dog
         |____pig
              |____fish
                   |____a
                   |____b
    |____4
         |____cat
    |____5

    >>> b = dm.flatten(a)
    >>> b.tree()
    +
    |____1<--dog
    |____1<--pig<--fish<--a
    |____1<--pig<--fish<--b
    |____4<--cat
    |____5

    >>> a.flatten()
    >>> a.tree()
    +
    |____1<--dog
    |____1<--pig<--fish<--a
    |____1<--pig<--fish<--b
    |____4<--cat
    |____5


    See Also
    --------
    unflatten
    SpaceData.flatten

    '''

    try:
        addme = dobj.__class__()
    except TypeError:
        addme = SpaceData()
    remlist = []
    for key in dobj: #iterate over keys in SpaceData
        if isinstance(dobj[key], dict):
            remlist.append(key)
            newname = str(key) + '<--'
            for levkey in dobj[key]:
                if hasattr(dobj[key][levkey], 'keys'):
                    retdict = flatten(dobj[key][levkey])
                    for key2 in retdict:
                        addme[newname+levkey+'<--'+key2] = retdict[key2]
                else:
                    addme[newname+levkey] = copy.copy(dobj[key][levkey])
        else:
            addme[key] = copy.copy(dobj[key])
    return addme



[docs]
def unflatten(dobj, marker='<--'):
    '''Collapse datamodel to one level deep

    Examples
    --------

    >>> import spacepy.datamodel as dm
    >>> import spacepy.toolbox as tb
    >>> a = dm.SpaceData()
    >>> a['1'] = dm.SpaceData(dog = 5, pig = dm.SpaceData(fish=dm.SpaceData(a='carp', b='perch')))
    >>> a['4'] = dm.SpaceData(cat = 'kitty')
    >>> a['5'] = 4
    >>> a.tree()
    +
    |____1
         |____dog
         |____pig
              |____fish
                   |____a
                   |____b
    |____4
         |____cat
    |____5

    >>> b = dm.flatten(a)
    >>> b.tree()
    +
    |____1<--dog
    |____1<--pig<--fish<--a
    |____1<--pig<--fish<--b
    |____4<--cat
    |____5

    >>> c = dm.unflatten(b)
    >>> c.tree()
    +
    |____1
         |____dog
         |____pig
              |____fish
                   |____a
                   |____b
    |____4
         |____cat
    |____5


    '''
    #set up a new object for return
    try:
        addme = dobj.__class__()
    except TypeError:
        addme = SpaceData()
    #the input is assumed to be single level (i.e. it is flat)

    #find all keys that have at least one marker,
    #then unpack. Recurse over these until no more markers are found.
    keydict = {}
    for key in dobj:
        if isinstance(dobj[key], dict):
            raise TypeError('Flat datamodel should not contain dict-likes')
        try:
            if marker in key:
                #get 'group'
                group = key.split(marker)[0]
                if not group in keydict:
                    keydict[group] = {key: ''}
                else:
                    keydict[group][key] = ''
            else: #not nested, just copy key
                addme[key] = dmcopy(dobj[key])
        except:
            addme[key] = dmcopy(dobj[key])
    #now we have all the groups at this level
    #move members of groups into new SpaceDatas
    for grp in keydict:
        addme[grp] = SpaceData()
        for key in keydict[grp]:
            newkey = marker.join(key.split(marker)[1:])
            addme[grp][newkey] = dmcopy(dobj[key])
        # recurse to make sure everything inside is unpacked
        addme[grp] = unflatten(addme[grp], marker=marker)
    return addme




[docs]
def fromCDF(fname):
    '''
    Create a SpacePy datamodel representation of a NASA CDF file

    Parameters
    ----------
    file : string
        the name of the cdf file to be loaded into a datamodel

    Returns
    -------
    out : spacepy.datamodel.SpaceData
        SpaceData with associated attributes and variables in dmarrays

    Examples
    --------
    >>> import spacepy.datamodel as dm
    >>> data = dm.fromCDF('test.cdf')

    See Also
    --------
    .pycdf.CDF.copy
    .pycdf.istp.VarBundle
    '''
    #TODO: add unflatten keyword and restore flattened variables
    try:
        from spacepy import pycdf
    except ImportError:
        raise ImportError("CDF converter requires NASA CDF library and SpacePy's pyCDF")

    with pycdf.CDF(fname) as cdfdata:
        return cdfdata.copy()



[docs]
def toCDF(fname, SDobject, skeleton='', flatten=False, overwrite=False,
          autoNRV=False, backward=None, TT2000=None, verbose=False):
    '''
    Create a CDF file from a SpacePy datamodel representation

    Parameters
    ----------
    fname : str
        Filename to write to

    SDobject : spacepy.datamodel.SpaceData
        SpaceData with associated attributes and variables in dmarrays

    Other Parameters
    ----------------
    skeleton : str (optional)
        create new CDF from a skeleton file (default '')

    flatten : bool (optional)
        flatten incoming datamodel - if SpaceData objects are nested (default False)

    overwrite : bool (optional)
        allow overwrite of an existing target file (default False)

    autoNRV : bool (optional)
        attempt automatic identification of non-record varying entries in CDF

    backward : bool (optional)
        ``False`` to create CDF in backward-compatible format; ``True``
         to force v3+ compatibility only. (Default: do not change current
         state, see :meth:`~.pycdf.Library.set_backward`).

         .. versionchanged:: 0.5.0
            Now supports specifying backward compatible or no change;
            previous versions always wrote v3+ CDFs (even if ``False``).

    TT2000 : bool (optional)
        Specify type for variables with names beginning 'Epoch'. Default
        CDF_EPOCH for backward-compatible CDF (``backward`` True) and
        CDF_TT20000 otherwise (``backward`` False or unspecified).

        .. versionchanged:: 0.5.0
           Current handling introduced.

        .. versionchanged:: 0.3.0
           Always write TT2000 variables (due to change in :mod:`~.pycdf`).

    verbose : bool (optional)
        verbosity flag

    Returns
    -------
    None

    Notes
    -----

    .. versionchanged:: 0.5.0
       Invalid keyword arguments now raise :exc:`TypeError` rather than being ignored.
    '''
    if flatten:
        SDobject = SDobject.flatten()
    if overwrite:
        raise NotImplementedError('Overwriting CDFs is not currently enabled '
                                  '- please remove the file manually')
    if TT2000 and backward:
        raise ValueError('Cannot use TT2000 in backward-compatible CDF.')
    try:
        from spacepy import pycdf
    except ImportError:
        raise ImportError("CDF converter requires NASA CDF library and"
                          " SpacePy's pyCDF")
    if backward is None:
        former_backward = None
    else:
        former_backward = pycdf.lib.set_backward(backward)
    force_epoch = not backward and TT2000 is False  # backward defaults falsey
    with pycdf.CDF(fname, skeleton) as outdata:
        if hasattr(SDobject, 'attrs'):
            for akey in SDobject.attrs:
                outdata.attrs[akey] = dmcopy(SDobject.attrs[akey])
        varLengths = [len(SDobject[var]) for var in SDobject]
        modeLength = next(itertools.groupby((reversed(sorted(varLengths)))))[0]
        for key, val in SDobject.items():
            if isinstance(val, dict):
                raise TypeError('This data structure appears to be nested,'
                                ' please try spacepy.datamodel.flatten')
            if not skeleton:
                if not val.shape:
                    shape_tup = -1
                else:
                    shape_tup = val.shape
                if 'Epoch' not in SDobject:
                    NRVtest = modeLength
                else:
                    NRVtest = len(SDobject['Epoch'])
                if shape_tup[0] != NRVtest: #naive check for 'should-be' NRV
                    try:
                        v = outdata.new(key, val[...], recVary=False)
                        if verbose:
                            print('{0} is being made NRV'.format(key))
                        v.attrs = dmcopy(val.attrs)
                    except ValueError:
                        v = outdata.new(key, val.tolist(), recVary=False)
                        v.attrs = dmcopy(val.attrs)
                if force_epoch and 'Epoch' in key:
                    outdata.new(key, val[...], type=pycdf.const.CDF_EPOCH)
                else:
                    try:
                        outdata[key] = val
                    except ValueError:
                        try:
                            outdata[key] = dmarray(
                                [val.tolist()], attrs=dmcopy(val.attrs)).squeeze()
                        except UnicodeEncodeError:
                            tmpAttrs = dmcopy(val.attrs)
                            for akey in tmpAttrs:
                                try: #strings
                                    if hasattr(tmpAttrs[akey], 'encode'):
                                        tmpAttrs[akey] = tmpAttrs[akey].encode('utf-8')
                                    else:
                                        tmpAttrs[akey] = tmpAttrs[akey]
                                except AttributeError: #probably a list of strings
                                    for id, el in enumerate(tmpAttrs[akey]):
                                        tmpAttrs[akey][id] = el.encode('utf-8')

            else:
                outdata[key][...] = val[...]
                for akey in outdata[key].attrs:
                    try:
                        outdata[key].attrs[akey] = dmcopy(val.attrs[akey])
                    except ValueError:
                        outdata[key][...] = dmarray([val.tolist()], attrs=dmcopy(val.attrs))
                    except KeyError:
                        pass
    if former_backward is not None:
        pycdf.lib.set_backward(former_backward)




[docs]
def fromHDF5(fname, **kwargs):
    '''
    Create a SpacePy datamodel representation of an HDF5 file or netCDF4 file which is HDF5 compliant

    Parameters
    ----------
    file : string
        the name of the HDF5/netCDF4 file to be loaded into a datamodel

    Returns
    -------
    out : spacepy.datamodel.SpaceData
        SpaceData with associated attributes and variables in dmarrays

    Examples
    --------
    >>> import spacepy.datamodel as dm
    >>> data = dm.fromHDF5('test.hdf')

    Notes
    -----
    Zero-sized datasets will break in h5py. This is kluged by returning a
    dmarray containing a None.

    This function is expected to work with any HDF5-compliant files, including
    netCDF4 (not netCDF3) and MatLab save files from v7.3 or later, but some
    datatypes are not supported, e.g., non-string vlen datatypes, and will
    raise a warning.
    '''
    def hdfcarryattrs(SDobject, hfile, path):
        if hasattr(hfile[path], 'attrs'):
            #for key, value in hfile[path].attrs.iteritems():
            for key in hfile[path].attrs:
                try:
                    value = hfile[path].attrs[key]
                except TypeError:
                    warnings.warn('Unsupported datatype in dataset {}.attrs[{}]'.format(path, key), stacklevel=2)
                    continue
                try:
                    SDobject.attrs[key] = value
                except:
                    warnings.warn('The following key:value pair is not permitted\n' +
                                  'key = {0} ({1})\n'.format(key, type(key)) +
                                  'value = {0} ({1})'.format(value, type(value)),
                                  DMWarning, stacklevel=2)

    try:
        import h5py
    except ImportError:
        raise ImportError('HDF5 converter requires h5py')

    if isinstance(fname, str_classes):
        hfile = h5py.File(fname, mode='r')
    else:
        hfile = fname
        #should test here for HDF file object
    path = kwargs.get('path', '/')

    SDobject = SpaceData()
    allowed_elems = [h5py.Group, h5py.Dataset]
    ##carry over the attributes
    hdfcarryattrs(SDobject, hfile, path)
    ##carry over the groups and datasets
    for key, value in hfile[path].items():
        if isinstance(value, allowed_elems[0]):  # if a group
            SDobject[key] = fromHDF5(hfile, path=path+'/'+key)
        elif isinstance(value, allowed_elems[1]):  # if a dataset
            isuni = h5py.check_vlen_dtype(value.dtype) is str
            try:
                if isuni:
                    if hasattr(value, 'asstr'):  # h5py 3+
                        value = value.asstr()
                    value = numpy.require(value[...], dtype=str)
                SDobject[key] = dmarray(value)
            except (TypeError, ZeroDivisionError): #ZeroDivisionError catches zero-sized DataSets
                SDobject[key] = dmarray(None)
            hdfcarryattrs(SDobject[key], hfile, path+'/'+key)
    if path == '/':
        hfile.close()
    return SDobject



[docs]
def toHDF5(fname, SDobject, **kwargs):
    '''
    Create an HDF5 file from a SpacePy datamodel representation

    Parameters
    ----------
    fname : str
        Filename to write to

    SDobject : spacepy.datamodel.SpaceData
        SpaceData with associated attributes and variables in dmarrays

    Other Parameters
    ----------------
    overwrite : bool (optional)
        allow overwrite of an existing target file (default True)
    mode : str (optional)
        HDF5 file open mode (a, w, r) (default 'a')
    compression : str (optional)
        compress all non-scalar variables using this method (default None)
        (gzip, shuffle, fletcher32, szip, lzf)

        .. versionchanged:: 0.4.0
            No longer compresses scalars (which usually fails).

    compression_opts : str (optional)
        options to the compression, see h5py documentation for more details

    Returns
    -------
    None

    Examples
    --------
    >>> import spacepy.datamodel as dm
    >>> a = dm.SpaceData()
    >>> a['data'] = dm.dmarray(range(100000), dtype=float)
    >>> dm.toHDF5('test_gzip.h5', a, overwrite=True, compression='gzip')
    >>> dm.toHDF5('test.h5', a, overwrite=True)
    >>> # test_gzip.h5 was 118k, test.h5 was 785k
    '''
    def SDcarryattrs(SDobject, hfile, path, allowed_attrs):
        if hasattr(SDobject, 'attrs'):
            for key, value in SDobject.attrs.items():
                dumval, dumkey = copy.copy(value), copy.copy(key)
                if isinstance(value, allowed_attrs):
                    #test for datetimes in iterables
                    if hasattr(value, '__iter__') and not isinstance(value, str_classes):
                        dumval = [b.isoformat() if isinstance(b, datetime.datetime) else b for b in value]
                    truth = False
                    try:
                        if value.nbytes: truth = True #empty arrays of any dimension are nbytes=0
                    except AttributeError: #not an array
                        if value or value == 0: truth = True

                    if truth:
                        uni = False #No special unicode handling
                        dumval = numpy.asanyarray(dumval)
                        if dumval.size and dumval.dtype.kind == 'U':
                            uni = True #Unicode list, special handling
                        try:
                            if uni:
                                #Tell hdf5 this is unicode. Numpy is UCS-4, HDF5 is UTF-8
                                hfile[path].attrs.create(
                                    dumkey, dumval, dtype=h5py.string_dtype(encoding='utf-8'))
                            else:
                                hfile[path].attrs[dumkey] = dumval
                        except TypeError:
                            hfile[path].attrs[dumkey] = str(dumval)
                            warnings.warn(
                                'The following value is not permitted\n' +
                                'key, value, type = {0}, {1}, {2})\n'.format(
                                    key, value, type(value)) +
                                'value has been converted to a string for output',
                                DMWarning, stacklevel=2)
                    else:
                        hfile[path].attrs[dumkey] = ''
                elif isinstance(value, datetime.datetime):
                    dumval = value.isoformat()
                    hfile[path].attrs[dumkey] = dumval
                else:
                    #TODO: add support for arrays(?) in attrs (convert to isoformat)
                    warnings.warn('The following key:value pair is not permitted\n' +
                                  'key = {0} ({1})\n'.format(key, type(key)) +
                                  'value type {0} is not in the allowed attribute list'.format(type(value)),
                                  DMWarning, stacklevel=2)

    try:
        import h5py
    except ImportError:
        raise ImportError('h5py is required to use HDF5 files')

    if not isinstance(SDobject, SpaceData):
        raise ValueError("Input data is not of type SpaceData, check usage:"
                         " toHDF5(fname, datamodel)")
    #mash these into a defaults dict...
    wr_mo = kwargs.get('mode', 'a')
    h5_compr_type = kwargs.get('compression', None)
    if h5_compr_type not in ['gzip', 'szip', 'lzf', 'shuffle', 'fletcher32', None]:
        raise NotImplementedError('Specified compression type not supported')
    h5_compr_opts = None if h5_compr_type == 'lzf'\
                    else kwargs.get('compression_opts', None)

    if 'overwrite' not in kwargs:
        kwargs['overwrite'] = True
    if isinstance(fname, str_classes):
        if os.path.isfile(fname):
            if kwargs['overwrite']:
                os.remove(fname)
            else:
                raise IOError('Cannot write HDF5, file exists (see overwrite) "{!s}"'.format(fname))
        hfile = h5py.File(fname, mode=wr_mo)
        must_close = True
    else:
        hfile = fname
        #should test here for HDF file object
        must_close = False
    path = kwargs.get('path', '/')

    allowed_attrs = [int, float, bytes, str, numpy.ndarray, list, tuple, numpy.bytes_]
    allowed_attrs.extend([
        numpy.dtype(v).type
        for v in itertools.chain(numpy.typecodes['AllInteger'], numpy.typecodes['AllFloat'])])
    allowed_attrs = tuple(allowed_attrs)

    allowed_elems = (SpaceData, dmarray)

    #first convert non-string keys to str
    SDobject = convertKeysToStr(SDobject)
    SDcarryattrs(SDobject, hfile, path, allowed_attrs)

    try:
        for key, value in SDobject.items():
            if isinstance(value, allowed_elems[0]):
                hfile[path].create_group(key)
                toHDF5(
                    hfile, value, path=path + '/' + key,
                    compression=h5_compr_type, compression_opts=h5_compr_opts)
            elif isinstance(value, allowed_elems[1]):
                comptype, compopts = (None, None) if value.shape == ()\
                                     else (h5_compr_type, h5_compr_opts)
                try:
                    hfile[path].create_dataset(key, data=value,
                                               compression=comptype, compression_opts=compopts)
                except:
                    dumval = numpy.asanyarray(value.copy())
                    dtype = None
                    if dumval.dtype.kind == 'U':
                        dumval = numpy.char.encode(dumval, 'utf-8')
                        dtype = h5py.string_dtype(encoding='utf-8')
                    elif isinstance(value[0], datetime.datetime):
                        for i, val in enumerate(value):
                            dumval[i] = val.isoformat()
                        dumval = dumval.astype('|S35')
                    else:
                        dumval = dumval.astype('|S35')
                    hfile[path].create_dataset(key, data=dumval, compression=comptype,
                                               compression_opts=compopts, dtype=dtype)
                    #else:
                    #    hfile[path].create_dataset(key, data=value.astype(float))
                SDcarryattrs(SDobject[key], hfile, path+'/'+key, allowed_attrs)
            else:
                warnings.warn('The following data is not being written as is not of an allowed type\n' +
                              'key = {0} ({1})\n'.format(key, type(key)) +
                              'value type {} is not in the allowed data type list'.format(
                                  type(value)), DMWarning, stacklevel=2)
    finally:
        if must_close:
            hfile.close()




[docs]
def fromNC3(fname):
    try:
        from scipy.io import netcdf as nc
    except ImportError:
        raise ImportError('SciPy is required to import netcdf3')

    ncfile = nc.netcdf_file(fname, mode='r', mmap=False)

    SDobject = SpaceData(attrs=dmcopy(ncfile._attributes))

    ##carry over the groups and datasets
    for key, value in ncfile.variables.items():
        #try:
            SDobject[key] = dmarray(dmcopy(value.data), attrs=dmcopy(value._attributes))
        #except (TypeError, ZeroDivisionError): #ZeroDivisionError catches zero-sized DataSets
        #    SDobject[key] = dmarray(None)
    ncfile.close()
    return SDobject





[docs]
def toHTML(fname, SDobject, attrs=(),
           varLinks=False, echo=False, tableTag='<table border="1">'):
    """
    Create an HTML dump of the structure of a spacedata

    Parameters
    ----------
    fname : str
        Filename to write to

    SDobject : spacepy.datamodel.SpaceData
        SpaceData with associated attributes and variables in dmarrays

    Other Parameters
    ----------------
    overwrite : bool (optional)
        allow overwrite of an existing target file (default True)
    mode : str (optional)
        HDF5 file open mode (a, w, r) (default 'a')
    echo : bool
        echo the html to the screen
    varLinks : bool
        make the variable name a link to a stub page

    """
    output = io.StringIO() # put the output into a StringIO
    keys = sorted(SDobject.keys())

    output.write(tableTag)
    output.write('\n')
    output.write('<tr><th>{0}</th>'.format('Variable'))
    for attr in attrs:
        output.write('<th>{0}</th>'.format(attr))
    output.write('</tr>')

    for ii, key in enumerate(keys):
        if ii % 2 == 0:
            output.write('<tr>')
        else:
            output.write('<tr class="alt">')
        output.write('<td>')
        if varLinks:
            output.write('<a href="{0}.html">'.format(key))
        output.write('{0}'.format(key))
        if varLinks:
            output.write('</a>')
        output.write('</td>')

        for attr in attrs:
            try:
                if not isinstance(SDobject[key].attrs[attr], str):
                    tmp = str(SDobject[key].attrs[attr])
                    output.write('<td>{0}</td>'.format(_idl2html(tmp)))
                else:
                    output.write('<td>{0}</td>'.format(_idl2html(SDobject[key].attrs[attr])))
            except KeyError:
                output.write('<td></td>')
        output.write('</tr>\n')
    output.write('</table>\n')
    with open(fname, 'w') as fp:
        fp.write(output.getvalue())
    if echo:
        print(output.getvalue())
    output.close()


def _idl2html(idl):
    """
    given an idl format string for text change it to html

    Parameters
    ==========
    idl : str
        idl formated string

    Returns
    =======
    out : str
        html formatted string
    """
    html = idl
    conv = {'!!': '!',
            '!E': '<sup>',
            '!I': '<sub>'}
    while True: # hate it but for now
        ind = html.find('!')
        if ind == -1:
            break
        code = html[ind:ind+2]
        html = html.replace(code, conv[code])
        if code == '!I':
            if '!N' in html:
                html = html.replace('!N', '</sub>', 1) # just replace 1
            else:
                html = html + '</sub>'
        elif code == '!E':
            if '!N' in html:
                html = html.replace('!N', '</sup>', 1) # just replace 1
            else:
                html = html + '</sup>'
    return html


[docs]
def readJSONMetadata(fname, **kwargs):
    '''Read JSON metadata from an ASCII data file

    Parameters
    ----------
    fname : str
        Filename to read metadata from

        .. versionchanged:: 0.5.0
                Filename can now be a .gz to indicate the file is gzipped

    Other Parameters
    ----------------
    verbose : bool (optional)
        set verbose output so metadata tree prints on read (default False)

    Returns
    -------
    mdata: spacepy.datamodel.SpaceData
        SpaceData with the metadata from the file
    '''
    if hasattr(fname, 'read'):
        lines = fname.read()
    else:
        if fname.endswith('.gz'):
            with gzip.open(filename=fname, mode='rt', encoding='latin=1') as gzh:
                lines = gzh.read()
        else:
            with open(fname, 'r') as f:
                lines = f.read()

    # isolate header
    p_srch = re.compile(r"^#(.*)$", re.M)
    hreg = re.findall(p_srch, lines)
    header = "".join(hreg)

    # isolate JSON field
    srch = re.search(r'\{\s*(.*)\s*\}', header)
    if isinstance(srch, type(None)):
        raise IOError(
            'The input file has no valid JSON header. Must be valid JSON bounded by braces "{ }".')
    js = srch.group(1)
    inx = js.rfind('end JSON')

    if inx == -1:
        js = ' '.join(('{', js, '}'))
        mdatadict = json.loads(js)
    else:
        js = ' '.join(('{', js[:inx]))
        mdatadict = json.loads(js)

    mdata = SpaceData()
    for key in mdatadict:
        if not hasattr(mdatadict[key], 'keys'): # not dict-like, must be global attrs
            mdata.attrs[key] = mdatadict[key]
        elif 'START_COLUMN' in mdatadict[key]: # is a variable
            mdata[key] = SpaceData(attrs=mdatadict[key])
        elif 'VALUES' in mdatadict[key]: # is global metadata
            dum = mdatadict[key].pop('VALUES')
            mdata[key] = dmarray(dum, attrs=mdatadict[key])
        else: # don't know how to deal with this, store as global attrs
            mdata.attrs[key] = mdatadict[key]

    if 'verbose' in kwargs:
        if kwargs['verbose']:
            mdata.tree(verbose=True, attrs=True)
    return mdata



[docs]
def readJSONheadedASCII(fname, mdata=None, comment='#', convert=False, restrict=None):
    """read JSON-headed ASCII data files into a SpacePy datamodel

    Parameters
    ----------
    fname : str or list
        Filename(s) to read data from

            .. versionchanged:: 0.5.0
                Filename can now be a .gz to indicate the file is gzipped

    Other Parameters
    ----------------
    mdata : spacepy.datamodel.SpaceData (optional)
        supply metadata object, otherwise is read from fname (default None)
    comment: str (optional)
        comment string in file to be read; lines starting with comment are
        ignored (default '#')
    convert: bool or dict-like (optional)
        If True, uses common names to try conversion from string. If a dict-
        like then uses the functions specified as the dict values to convert
        each element of 'key' to a non-string
    restrict: list of strings (optional)
        If present, restrict the variables stored to only those on this list

    Returns
    -------
    mdata: spacepy.datamodel.SpaceData
        SpaceData with the data and metadata from the file
    """
    import dateutil.parser as dup
    filelike = False
    if isinstance(fname, str_classes):
        fname = [fname]
    elif hasattr(fname, 'readlines'):
        fname = [fname]
        filelike = True
    if not mdata:
        mdata = readJSONMetadata(fname[0])
    if restrict:
        delkeys = [kk for kk in mdata.keys() if kk not in restrict]
        for val in delkeys:
            del mdata[val] #remove undesired keys
    mdata_copy = dmcopy(mdata)
    def innerloop(fh, mdata, mdata_copy):
        line = fh.readline()
        line = line.decode('latin1')
        while (line and line[0] == comment):
            line = fh.readline()
            line = line.decode('latin1')
        fh.seek(-len(line), os.SEEK_CUR) # fixes the missing first data bug
        alldata = fh.readlines()
        if not alldata:
            return mdata
        alldata = [d.decode('latin1') for d in alldata]
        ncols = len(alldata[0].rstrip().split())
        # fixes None in the data from empty lines at the end
        for row in range(len(alldata)): # reverse order
            if not alldata[-1].rstrip(): # blank line (or al white space)
                alldata.pop(-1)
            else:
                break
        nrows = len(alldata)
        data = numpy.empty((nrows, ncols), dtype=object)
        for ridx, line in enumerate(alldata):
            for cidx, el in enumerate(line.rstrip().split()):
                data[ridx, cidx] = el
        for key in mdata_copy.keys():
            if 'START_COLUMN' in mdata_copy[key].attrs:
                st = mdata_copy[key].attrs['START_COLUMN']
                if 'DIMENSION' in mdata_copy[key].attrs:
                    varDims = numpy.array(mdata_copy[key].attrs['DIMENSION'])
                    if not varDims.shape:
                        varDims = numpy.array([varDims])
                    singleDim = True
                    if len(varDims) > 1 or varDims[0] > 1:
                        singleDim = False
                if ('DIMENSION' in mdata_copy[key].attrs) and not singleDim:
                    en = int(mdata_copy[key].attrs['DIMENSION'][0]) + int(st)
                    try:
                        assert mdata[key] == {}
                        mdata[key] = data[:, int(st):int(en)]
                    except (AssertionError, ValueError):
                        mdata[key] = numpy.vstack((mdata[key], data[:, int(st):int(en)]))
                else:
                    try:
                        assert mdata[key] == {}
                        mdata[key] = data[:, int(st)]
                    except (AssertionError, ValueError):
                        mdata[key] = numpy.hstack((mdata[key], data[:, int(st)]))
        return mdata
    for fn in fname:
        if not filelike:
            if fn.endswith('.gz'):
                with gzip.open(filename=fn) as gzh:
                    mdata = innerloop(gzh, mdata, mdata_copy)
            else:
                with open(fn, 'rb') as fh: # fixes windows bug with seek()
                    mdata = innerloop(fh, mdata, mdata_copy)
        else:
            mdata = innerloop(fh, mdata, mdata_copy)
    #now add the attributres to the variables
    keys = list(mdata_copy.keys())
    for key in keys:
        if isinstance(mdata[key], SpaceData):
            mdata[key] = dmarray(None, attrs=mdata_copy[key].attrs)
        else:
            mdata[key] = dmarray(mdata[key], attrs=mdata_copy[key].attrs)

    if convert:
        if isinstance(convert, dict):
            conversions = convert
        else:
            conversions = {'DateTime': lambda x: dup.parse(x, ignoretz=True),
                           'ExtModel': str}
        for conkey in conversions:
            try:
                name = keys.pop(keys.index(conkey)) #remove from keylist
            except ValueError:
                warnings.warn('Key {} for conversion not found in file'.format(conkey),
                              UserWarning, stacklevel=2)
                continue
            for i, element in numpy.ndenumerate(mdata[name]):
                mdata[name][i] = conversions[name](element)

    for remkey in keys:
        try:
            mdata[remkey] = numpy.asanyarray(mdata[remkey], dtype=float)
        except ValueError:
            pass #this will skip any unspecified string fields
    return mdata



[docs]
def writeJSONMetadata(fname, insd, depend0=None, order=None, verbose=False, returnString=False):
    '''Scrape metadata from SpaceData object and make a JSON header

    Parameters
    ----------
    fname : str
        Filename to write to (can also use a file-like object)
        None can be given in conjunction with the returnString keyword to skip writing output

    insd : spacepy.datamodel.SpaceData
        SpaceData with associated attributes and variables in dmarrays

    Other Parameters
    ----------------
    depend0 : str (optional)
        variable name to use to indicate parameter on which other data depend (e.g. Time)
    order : list (optional)
        list of key names in order of start column in output JSON file
    verbose: bool (optional)
        verbose output
    returnString: bool (optional)
        return JSON header as string instead of returning None

    Returns
    -------
    None  (unless returnString keyword is True)
    '''
    js_out = {}

    def stripNL(text):
        out = text.group().replace('\n', '').replace('  ', '')
        return out

    #if required, identify depend0 for deciding what's data/metadata
    if depend0 is None:
        #search for DEPEND_0 in metadata
        for key in insd:
            if not hasattr(insd[key], 'attrs'):
                insd[key] = dmarray(insd[key])
            if 'DEPEND_0' in insd[key].attrs:
                depend0 = insd[key].attrs['DEPEND_0']
                if not isinstance(depend0, str_classes):
                    #assume it's a singleton list
                    depend0 = depend0[0]
                if not isinstance(depend0, str_classes):
                    depend0 = None #Failed to get a depend0
                else:
                    break #we're done here
        if depend0 is None:
            #fall back to most common var length
            tmp, keylist = [], list(insd.keys())
            for key in keylist:
                tmp.append(len(insd[key]))
            depend0 = keylist[tmp.index(numpy.bincount(tmp).argmax())]
            #TODO Set using Time, or Epoch, or similar...
    elif not depend0 in insd:
        raise KeyError('Invalid key supplied for ordering metadata on write')
    datalen = len(insd[depend0])

    #start with global attrs
    #TODO: check for datetime objs in attributes
    if insd.attrs:
        glattr = _dateToISO(insd.attrs)
        for key in glattr:
            js_out[key] = dmcopy(glattr[key])
            #TODO Mark these as global somehow (by omission of some metadata?)
            try:
                js_out[key] = js_out[key].tolist()
            except:
                pass
    #collect keys and put in order for output
    #TODO first check for extant START_COLUMN
    #then check dimensionality so that start column and dims can be added, if not present
    if hasattr(order, '__iter__'):
        keylist = order
        #now make sure that all missing keys are added to end
        for key in sorted(insd.keys()):
            if key not in order:
                keylist.append(key)
    else:
        ##TODO do we want to have DEPEND0 first in order by default?
        keylist = sorted(insd.keys())

    idx = 0
    for key in keylist:
        js_out[key] = dmcopy(_dateToISO(insd[key].attrs))
        if len(insd[key]) == datalen: #is data
            if verbose:
                print('data: {0}'.format(key))
            try:
                js_out[key]['DIMENSION'] = list(insd[key].shape[1:])
                if not js_out[key]['DIMENSION']:
                    js_out[key]['DIMENSION'] = [1]
                js_out[key]['START_COLUMN'] = idx
                dims = js_out[key]['DIMENSION']
                idx += int(dims[0])
                if len(dims) > 1:
                    l1 = 'The data cannot be properly represented in JSON-headed ASCII'\
                         ' as it has too high a rank\n'
                    l2 = 'key = {0} ({1})\n'.format(key, insd[key].shape)
                    l3 = 'Maximum allowed number of dimensions is 2\n'
                    warnings.warn(''.join([l1, l2, l3]), DMWarning, stacklevel=2)
            except AttributeError: #AttrErr if just metadata
                #js_out[key]['DIMENSION'] = insd[key].attrs['DIMENSION']
                pass
        else: #is metadata
            if verbose:
                print('metadata: {0}'.format(key))
            js_out[key]['VALUES'] = dmcopy(_dateToISO(insd[key]))
            js_out[key]['DIMENSION'] = [len(js_out[key]['VALUES'])]
        for kk in js_out[key]:
            try:
                js_out[key][kk] = js_out[key][kk].tolist()
            except:
                pass
    json_str = json.dumps(js_out, indent=4, sort_keys=True)
    reob = re.compile(r'\[.*?\]', re.DOTALL)
    json_str = re.sub(reob, stripNL, json_str) #put lists back onto one line
    #add comment field for header
    json_str = ''.join(['#', json_str])
    json_str = '\n#'.join(json_str.split('\n'))
    json_str = ''.join([json_str, '\n'])

    if isinstance(fname, str_classes):
        with open(fname, 'w') as fh:
            fh.writelines(json_str)
    elif hasattr(fname, 'writelines'):
        fname.writelines(json_str)
    elif (fname is None) and (returnString):
        return json_str

    if returnString:
        return json_str



def _dateToISO(indict):
    """
    covert datetimes to iso strings inside of datamodel attributes
    """
    retdict = dmcopy(indict)
    if isinstance(indict, dict):
        for key in indict:
            if isinstance(indict[key], datetime.datetime):
                retdict[key] = retdict[key].isoformat()
            elif hasattr(indict[key], '__iter__'):
                for idx, el in enumerate(indict[key]):
                    if isinstance(el, datetime.datetime):
                        retdict[key][idx] = el.isoformat()
    else:
        if isinstance(indict, datetime.datetime):
            retdict = retdict.isoformat()
        elif hasattr(indict, '__iter__'):
            retdict = numpy.asanyarray(retdict)
            for idx, el in numpy.ndenumerate(indict):
                if isinstance(el, datetime.datetime):
                    retdict[idx] = el.isoformat()
    return retdict



[docs]
def toJSONheadedASCII(fname, insd, metadata=None, depend0=None, order=None, **kwargs):
    '''Write JSON-headed ASCII file of data with metadata from SpaceData object

    Parameters
    ----------
    fname : str
        Filename to write to (can also use a file-like object)
        None can be given in conjunction with the returnString keyword to skip writing output

    insd : spacepy.datamodel.SpaceData
        SpaceData with associated attributes and variables in dmarrays

    Other Parameters
    ----------------
    depend0 : str (optional)
        variable name to use to indicate parameter on which other data depend (e.g. Time)
    order : list (optional)
        list of key names in order of start column in output JSON file
    metadata: str or file-like (optional)
        filename with JSON header to use (or file-like with JSON metadata)
    delimiter: str
        delimiter to use in ASCII output (default is whitespace), for tab, use '\t'

    Returns
    -------
    None

    Examples
    --------
    >>> import spacepy.datamodel as dm
    >>> data = dm.SpaceData()
    >>> data.attrs['Global'] = 'A global attribute'
    >>> data['Var1'] = dm.dmarray([1,2,3,4,5], attrs={'Local1': 'A local attribute'})
    >>> data['Var2'] = dm.dmarray([[8,9],[9,1],[3,4],[8,9],[7,8]])
    >>> data['MVar'] = dm.dmarray([7.8], attrs={'Note': 'Metadata'})
    >>> dm.toJSONheadedASCII('outFile.txt', data, depend0='Var1', order=['Var1'])
    #Note that not all field names are required, those not given will be listed
    #alphabetically after those that are specified
    '''
    kwarg_dict = {'delimiter': ' '}
    for key in kwarg_dict:
        if key in kwargs:
            kwarg_dict[key] = kwargs[key]
    if not metadata:
        metadata = io.StringIO()
        writeJSONMetadata(metadata, insd, depend0=depend0, order=order)
        metadata.seek(0) #rewind StringIO object to start
    hdr = readJSONMetadata(metadata)

    datlist = []
    for key in hdr:
        if 'START_COLUMN' in hdr[key].attrs:
            #add to list of (start_col, keyname) pairs
            datlist.append((hdr[key].attrs['START_COLUMN'], key, hdr[key].attrs['DIMENSION'][0]))
            #also use for data length
            datlen = len(insd[key])
            if datlen == 0:
                raise ValueError('No data present to write: Use writeJSONmetadata')
                #TODO: Set this to just default to writing the header out and raise a warning
    datlist.sort()
    ncols = datlist[-1][0]+datlist[-1][-1]

    #now open file (file-like) and for each line in len(data)
    #write the line using start_column, name, dimension
    data = numpy.zeros([datlen, ncols], dtype=object)
    for stcol, name, dim in datlist:
        if dim == 1:
            data[:, stcol] = _dateToISO(insd[name])
        else:
            data[:, stcol:stcol+dim] = _dateToISO(insd[name])
    hdstr = writeJSONMetadata(None, hdr, depend0=depend0, order=order, returnString=True)
    with open(fname, 'w') as fh:
        fh.writelines(hdstr)
        for line in data:
            prline = kwarg_dict['delimiter'].join([str(el) for el in line])
            fh.write(''.join([prline, '\n']))




[docs]
def fromRecArray(recarr):
    '''Takes a numpy recarray and returns each field as a dmarray in a SpaceData container

    Parameters
    ----------
    recarr : numpy record array
        object to parse into SpaceData container

    Returns
    -------
    sd: spacepy.datamodel.SpaceData
        dict-like containing arrays of named records in recarr

    Examples
    --------
    >>> import numpy as np
    >>> import spacepy.datamodel as dm
    >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', float), ('y', int)])
    >>> print(x, x.dtype)
    array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i4')])
    >>> sd = dm.fromRecArray(x)
    >>> sd.tree(verbose=1)
    +
    |____x (spacepy.datamodel.dmarray (2,))
    |____y (spacepy.datamodel.dmarray (2,))
    '''
    sd = SpaceData()
    for key in recarr.dtype.fields.keys():
        sd[key] = dmarray(recarr[key])
    return sd



[docs]
def toRecArray(sdo):
    '''Takes a SpaceData and creates a numpy recarray

    Parameters
    ----------
    sdo : SpaceData
        SpaceData to change to a numpy recarray

    Returns
    -------
    recarr: numpy record array
        numpy.recarray object with the same values (attributes are lost)

    Examples
    --------
    >>> import numpy as np
    >>> import spacepy.datamodel as dm
    >>> sd = dm.SpaceData()
    >>> sd['x'] = dm.dmarray([1.0, 2.0])
    >>> sd['y'] = dm.dmarray([2,4])
    >>> sd.tree(verbose=1)
    +
    |____x (spacepy.datamodel.dmarray (2,))
    |____y (spacepy.datamodel.dmarray (2,))
    >>> ra = dm.toRecArray(sd)
    >>> print(ra, ra.dtype)
    [(2, 1.0) (4, 2.0)] (numpy.record, [('y', '<i8'), ('x', '<f8')])
    '''
    nametype = numpy.dtype([(k, sdo[k].dtype.str) for k in sdo])
    recarr = numpy.rec.fromarrays([sdo[k] for k in sdo], dtype=nametype)
    return recarr




[docs]
def dmcopy(dobj):
    '''Generic copy utility to return a copy of a (datamodel) object

    Parameters
    ----------
    dobj : object
        object to return a copy of

    Returns
    -------
    copy_obj: object (same type as input)
        copy of input oibject

    Examples
    --------
    >>> import spacepy.datamodel as dm
    >>> dat = dm.dmarray([2,3], attrs={'units': 'T'})
    >>> dat1 = dm.dmcopy(dat)
    >>> dat1.attrs['copy': True]
    >>> dat is dat1
    False
    >>> dat1.attrs
    {'copy': True, 'units': 'T'}
    >>> dat.attrs
    {'units': 'T'}
    '''
    if isinstance(dobj, (SpaceData, dmarray)):
        return copy.deepcopy(dobj)
    if isinstance(dobj, numpy.ndarray):
        return numpy.copy(dobj)
    return copy.copy(dobj)



[docs]
def createISTPattrs(datatype, ndims=1, vartype=None, units=' ', NRV=False):
    '''Return set of unpopulated attributes for ISTP compliant variable

    Parameters
    ----------
    datatype : {'data', 'support_data', 'metadata'}
        datatype of variable to create metadata for.
    ndims : int
        number of dimensions, default=1
    vartype : {'float', 'char', 'int', 'epoch', 'tt2000'}
        The type of the variable, default=float
    units : str
        The units of the variable, default=' '
    NRV : bool
        Is the variable NRV (non-record varying), default=False

    Returns
    -------
    attrs : dict
        dictionary of attributes for the variable

    Examples
    --------
    >>> import spacepy.datamodel as dm
    >>> dm.createISTPattrs('data', ndims=2, vartype='float', units='MeV')
    {'CATDESC': '',
     'DISPLAY_TYPE': 'spectrogram',
     'FIELDNAM': '',
     'FILLVAL': -1e+31,
     'FORMAT': 'F18.6',
     'LABLAXIS': '',
     'SI_CONVERSION': ' > ',
     'UNITS': 'MeV',
     'VALIDMIN': '',
     'VALIDMAX': '',
     'VAR_TYPE': 'data',
     'DEPEND_0': 'Epoch',
     'DEPEND_1': ''}
    '''
    fillvals = {'float': -1e31,
                'char': '',
                'int': numpy.array(-2147483648).astype(numpy.int32),
                'epoch': -1.0E31, #datetime.datetime(9999,12,31,23,59,59,999)}
                'tt2000': numpy.array(-9223372036854775808).astype(numpy.int64)}
    formats = {'float': 'F18.6',
               'char': 'A30',
               'int': 'I11',
               'epoch': '',
               'tt2000': 'I21'}
    disp = {1: 'time_series',
            2: 'spectrogram',
            3: 'spectrogram',
            4: 'spectrogram'}
    if vartype not in fillvals:
        fill = -1e31
        form = 'F15.6'
    else:
        fill = fillvals[vartype]
        form = formats[vartype]

    unit = units

    if datatype == 'data':
        attrs = {
            'CATDESC': '',
            'DISPLAY_TYPE': disp[ndims],
            'FIELDNAM': '',
            'FILLVAL': fill,
            'FORMAT': form,
            'LABLAXIS': '',
            'SI_CONVERSION': ' > ',
            'UNITS': unit,
            'VALIDMIN': '',
            'VALIDMAX': '',
            'VAR_TYPE': 'data'
            }
        for dim in range(ndims):
            attrs['DEPEND_{0}'.format(dim)] = ''
        attrs['DEPEND_0'] = 'Epoch'
    elif datatype == 'support_data':
        attrs = {
            'CATDESC': '',
            'FIELDNAM': '',
            'FORMAT': form,
            'UNITS': unit,
            'VAR_TYPE': 'support_data'
            }
        for dim in range(ndims):
            attrs['DEPEND_{0}'.format(dim)] = ''
        if not NRV:
            attrs['VALIDMIN'] = ''
            attrs['VALIDMAX'] = ''
            attrs['FILLVAL'] = fill
            attrs['DEPEND_0'] = 'Epoch'
        else:
            del attrs['DEPEND_0']
    elif datatype == 'metadata':
        attrs = {
            'CATDESC': '',
            'FIELDNAM': '',
            'FORMAT': form,
            'UNITS': unit,
            'VAR_TYPE': 'metadata'
            }
        for dim in range(ndims):
            attrs['DEPEND_{0}'.format(dim)] = ''
        if not NRV:
            attrs['FILLVAL'] = fill
            attrs['DEPEND_0'] = 'Epoch'
        else:
            del attrs['DEPEND_0']
    else:
        raise ValueError("Invalid datatype (data|support_data|metadata)")

    return attrs



def _getVarLengths(data):
    """
    get the length of all the variables

    Parameters
    ----------
    data : SpaceData
        SpaceData object to return the length of the variables

    Returns
    -------
    data : dict
        dict of the names and lengths of a SpaceData
    """
    ans = {}
    for k, v in data.items():
        ans[k] = len(v)
    return ans


[docs]
def resample(data, time=None, winsize=0, overlap=0, st_time=None, outtimename='Epoch'):
    """
    resample a SpaceData to a new time interval

    Parameters
    ----------
    data : SpaceData or dmarray
        SpaceData with data to resample or dmarray with data to resample,
        variables can only be 1d or 2d, if time is specified only variables
        the same length as time are resampled, otherwise only variables
        with length equal to the longest length are resampled

    time : array-like
        dmarray of times the correspond to the data

    winsize : datetime.timedelta
        Time frame to average the data over

    overlap : datetime.timedelta
        Overlap in the moving average

    st_time : datetime.datetime
        Starting time for the resample, if not specified the time of the first
        data point is used (see spacepy.toolbox.windowMean)

    Returns
    -------
    ans : SpaceData
        Resampled data, included keys are in the input keys (with the data caveats above)
        and Epoch which contains the output time

    Examples
    --------
    >>> import datetime
    >>> import spacepy.datamodel as dm
    >>> a = dm.SpaceData()
    >>> a.attrs['foo'] = 'bar'
    >>> a['a'] = dm.dmarray(range(10*2)).reshape(10,2)
    >>> a['b'] = dm.dmarray(range(10)) + 4
    >>> a['c'] = dm.dmarray(range(3)) + 10
    >>> times = [datetime.datetime(2010, 1, 1) + datetime.timedelta(hours=i) for i in range(10)]
    >>> out = dm.resample(a, times, winsize=datetime.timedelta(hours=2), overlap=datetime.timedelta(hours=0))
    >>> out.tree(verbose=1, attrs=1)
    # +
    # :|____foo (str [3])
    # |____Epoch (spacepy.datamodel.dmarray (4,))
    # |____a (spacepy.datamodel.dmarray (4, 2))
    # :|____DEPEND_0 (str [5])
    #
    # Things to note:
    #    - attributes are preserved
    #    - the output variables have their DEPEND_0 changed to Epoch (or outtimename)
    #    - each dimension of a 2d array is resampled individually
    """
    from . import toolbox
    # check for SpaceData or dmarray input before going to a bunch of work
    if not isinstance(data, (SpaceData, dmarray)):
        raise TypeError('Input must be a SpaceData or dmarray object')
    if time is None:
        time = []
    # can only resample variables that have the same length as time,
    #    if time is default then use all the vals that are the same
    #    as the longest var
    lent = len(time)
    if lent == 0:
        lent = len(data[max(data, key=lambda k: len(data[k]))])
    keys = [k for k in data if len(data[k]) == lent]
    # what time are we starting at?
    try:
        t_int = time.UTC
    except AttributeError:
        t_int = dmarray(time)
    if t_int.any() and ((st_time is None) and isinstance(t_int[0], datetime.datetime)):
        st_time = t_int[0].replace(hour=0, minute=0, second=0, microsecond=0)

    ans = SpaceData()
    ans.attrs = data.attrs

    for k in keys:
        if len(data[k].shape) > 1:
            if len(data[k].shape) > 2:
                raise IndexError("Variables can only be 1d or 2d")
            for i in range(data[k].shape[1]):
                d, t = toolbox.windowMean(data[k][:, i], time=t_int, winsize=winsize,
                                          overlap=overlap, st_time=st_time)
                if k not in ans:
                    ans[k] = dmarray(d)
                else:
                    ans[k] = dmarray.vstack(ans[k], d)
            ans[k] = ans[k].T
        else:
            d, t = toolbox.windowMean(data[k], time=t_int, winsize=winsize,
                                      overlap=overlap, st_time=st_time)
            ans[k] = dmarray(d)
        try:
            ans[k].attrs = data[k].attrs
        except AttributeError: # was not a dmarray
            pass
        ans[k].attrs['DEPEND_0'] = outtimename
    ans[outtimename] = dmarray(t)

    return ans