Source code for spacepy.poppy

#!/usr/bin/python
# -*- coding: utf-8 -*-

"""PoPPy -- Point Processes in Python.

This module contains point process class types and a variety of functions for
association analysis. The routines given here grew from work presented by
Morley and Freeman (Geophysical Research Letters, 34, L08104, doi:10.1029/
2006GL028891, 2007), which were originally written in IDL. This module is
intended for application to discrete time series of events to assess
statistical association between the series and to calculate confidence limits.
Any mis-application or mis-interpretation by the user is the user's own fault.

>>> import datetime as dt
>>> import spacepy.time as spt

Since association analysis is rather computationally expensive, this example
shows timing.

>>> t0 = dt.datetime.now()
>>> onsets = spt.Ticktock(onset_epochs, 'CDF')
>>> ticksR1 = spt.Ticktock(tr_list, 'CDF')

Each instance must be initialized

>>> lags = [dt.timedelta(minutes=n) for n in range(-400,401,2)]
>>> halfwindow = dt.timedelta(minutes=10)
>>> pp1 = poppy.PPro(onsets.UTC, ticksR1.UTC, lags, halfwindow)

To perform association analysis

>>> pp1.assoc()
Starting association analysis
calculating association for series of length [3494, 1323] at 401 lags
>>> t1 = dt.datetime.now()
>>> print("Elapsed:  " + str(t1-t0))
Elapsed:  0:35:46.927138

Note that for calculating associations between long series at a large number of
lags is SLOW!!

To plot

>>> pp1.plot(dpi=80)
Error: No confidence intervals to plot - skipping

To add 95% confidence limits (using 4000 bootstrap samples)

>>> pp1.aa_ci(95, n_boots=4000)

The plot method will then add the 95% confidence intervals as a semi-
transparent patch.


Authors: Steve Morley and Jon Niehof
Institution: Los Alamos National Laboratory
Contact: smorley@lanl.gov, jniehof@lanl.gov

Copyright 2010 Los Alamos National Security, LLC.
"""

import bisect
import ctypes
import sys
import warnings
import datetime as dt

import numpy as np

from spacepy import help
from spacepy import lib
import spacepy.toolbox as tb
import spacepy.datamodel as dm

__contact__ = 'Steve Morley, smorley@lanl.gov'

[docs] class PPro(object): """PoPPy point process object Initialize object with series1 and series2. These should be timeseries of events, given as lists, arrays, or lists of datetime objects. Includes method to perform association analysis of input series Output can be nicely plotted with :py:meth:`plot`. Contains the upper and lower confidence limits for the association number as a function of lag. The first element is the array of lower limits; the second, the array of upper limits. Not available until after calling :meth:`aa_ci`. Contains the confidence that the association number, as a function of lag, is above the asymptotic association number. (The confidence of being below is 100 - ``conf_above``.) Not available until after calling :meth:`aa_ci`. """ #NB: P2 is the "master" timescale, P1 gets shifted by lags #Add lag to p1 to reach p2's timescale, subtract lag from p2 to reach p1's ci = None """Upper and lower confidence limits for the association number""" conf_above = None """Confidence that the association number is above the asymptotic"""
[docs] def __init__(self, process1, process2, lags=None, winhalf=None, verbose=False): self.process1 = process1 self.process2 = process2 self.lags = lags self.winhalf = winhalf self.verbose = verbose
def __str__(self): """String Representation of PoPPy object""" try: pk = max(self.assoc_total) except AttributeError: pk = 'N/A' try: asy = self.asympt_assoc except: asy = 'N/A' # TODO this is broken not enough values for the format statement return """Point Process Object: Points in process #1 - %d ; Points in process #2 - %d Peak association number - %s ; Asymptotic association - %s """ % (len(self.process1), len(self.process2), pk, asy) __repr__ = __str__ def __len__(self): """Calling len(obj) will return the number of points in process 1""" return len(self.process1)
[docs] def swap(self): """Swaps process 1 and process 2""" self.process1, self.process2 = self.process2, self.process1 return None
[docs] def assoc(self, u=None, h=None): """Perform association analysis on input series Parameters ========== u : list, optional the time lags to use h : association window half-width, same type as process1 """ #check for existence of lags and winhalf if u is not None: self.lags = u if h is not None: self.winhalf = h if self.lags is None or self.winhalf is None: if self.verbose: print('assoc error: attributes lags and winhalf must be populated') return if self.verbose: print('calculating association for series of length %s at %d lags' % ([len(self.process1), len(self.process2)], len(self.lags))) import matplotlib as mpl import matplotlib.dates as mpd dtype = 'int' + str(ctypes.sizeof(ctypes.c_long) * 8 if lib.have_libspacepy else 64) ##Method 1 - use tb.tOverlap #create list for association number self.n_assoc = np.empty((len(self.lags), len(self.process1)), order='C', dtype=dtype) p2 = sorted(self.process2) p1 = sorted(self.process1) if lib.have_libspacepy == False: winhalf = self.winhalf lags = self.lags starts = [t - self.winhalf for t in p1] stops = [t + self.winhalf for t in p1] nss_list = list(range(len(self.process1))) for ilag in range(len(self.lags)): last_idx = [bisect.bisect_right(p2, stops[nss] + self.lags[ilag]) for nss in nss_list] first_idx = [bisect.bisect_left(p2, starts[nss] + self.lags[ilag]) for nss in nss_list] self.n_assoc[ilag, :] = [last_idx[i] - first_idx[i] for i in nss_list] else: def fracday(dd): '''turn a timedelta into a fractional day''' #NOTE: If 2.6 compatibility is ever dropped, can use dd.total_seconds()/86400 return dd.days + dd.seconds/86400.0 + dd.microseconds/(86400.0 * 10**6) #test for datetime input and try to convert to numeric if isinstance(p1[0], dt.datetime): p1 = [mpd.date2num(pp) for pp in p1] if isinstance(p2[0], dt.datetime): p2 = [mpd.date2num(pp) for pp in p2] #TODO: convert lags and winhalf from timedelta to fractional days if isinstance(self.lags[0], dt.timedelta): lags = [fracday(dd) for dd in self.lags] else: lags = self.lags if isinstance(self.winhalf, dt.timedelta): winhalf = fracday(self.winhalf) else: winhalf = self.winhalf p2 = (ctypes.c_double * len(p2))(*p2) p1 = (ctypes.c_double * len(p1))(*p1) #prctile_rank dies on some versions if this is a ctypes array lags = np.require(lags, dtype=ctypes.c_double, requirements='C') n_assoc = self.n_assoc.ctypes.data_as(lib.lptr) lib.assoc(p2, p1, lags.ctypes.data_as(lib.dptr), n_assoc, winhalf, len(p2), len(p1), len(self.lags)) left20perc = int(np.round((len(lags)*0.2))) right20perc = int(np.round((len(lags)*0.8))) self.assoc_total = np.sum(self.n_assoc, axis=1) valsL = self.assoc_total[:left20perc] valsR = self.assoc_total[right20perc:] self.asympt_assoc = np.mean([np.mean(valsL), np.mean(valsR)]) self.expected = np.empty([len(self.lags)], dtype='float64') for i in range(len(self.lags)): start_time = max([p1[0] + lags[i], p2[0]]) - winhalf stop_time = min([p1[-1] + lags[i], p2[-1]]) + winhalf if start_time != stop_time: n1 = bisect.bisect_right(p1, stop_time - lags[i]) - \ bisect.bisect_left(p1, start_time - lags[i]) n2 = bisect.bisect_right(p2, stop_time) - \ bisect.bisect_left(p2, start_time) self.expected[i] = 2.0 * n1 * n2 * winhalf / \ (stop_time - start_time) else: self.expected[i] = 0.0 return None
[docs] def assoc_mult(self, windows, inter=95, n_boots=1000, seed=None): """Association analysis w/confidence interval on multiple windows Using the time sequence and lags stored in this object, perform full association analysis, including bootstrapping of confidence intervals, for every listed window half-size Parameters ========== windows : sequence window half-size for each analysis inter : float, optional desired confidence interval, default 95 n_boots : int, optional number of bootstrap iterations, default 1000 seed : int, optional Random number generator seed. It is STRONGLY recommended not to specify (i.e. leave None) to permit multithreading. Warnings ======== This function is likely to take a LOT of time. Returns ======= out : three numpy array Three numpy arrays, (windows x lags), containing (in order) low values of confidence interval, high values of ci, percentage confidence above the asymptotic association number """ n_lags = len(self.lags) ci_low = np.empty([len(windows), n_lags]) ci_high = np.empty([len(windows), n_lags]) percentiles = np.empty([len(windows), n_lags]) for i in range(len(windows)): window = windows[i] self.assoc(h=window) self.aa_ci(inter, n_boots, seed) ci_low[i, :] = self.ci[0] ci_high[i, :] = self.ci[1] percentiles[i, :] = self.conf_above return (ci_low, ci_high, percentiles)
[docs] def plot_mult(self, windows, data, min=None, max=None, cbar_label=None, figsize=None, dpi=80, xlabel='Lag', ylabel='Window Size'): """Plots a 2D function of window size and lag Parameters ========== windows : list list of window sizes (y axis) data : list list of data, dimensioned (windows x lags) min : float, optional clip L{data} to this minimum value max : float, optional clip L{data} to this maximum value """ import matplotlib.pyplot as plt x = np.array(tb.bin_center_to_edges(self.lags)) y = np.array(tb.bin_center_to_edges(windows)) fig = plt.figure(figsize=figsize, dpi=dpi) ax0 = fig.add_subplot(111) cax = ax0.pcolormesh(x, y, data, vmin=min, vmax=max, shading='flat') ax0.set_xlim((x[0], x[-1])) ax0.set_ylim((y[0], y[-1])) plt.xlabel(xlabel) plt.ylabel(ylabel) if cbar_label is None: cbar_label = '{} confident above asymptotic association'.format( r'\%' if plt.rcParams['text.usetex'] else r'%') plt.colorbar(cax, fraction=0.05).set_label(cbar_label) return fig
[docs] def plot(self, figsize=None, dpi=80, asympt=True, show=True, norm=True, xlabel='Time lag', xscale=None, ylabel=None, title=None, transparent=True): """Create basic plot of association analysis. Uses object attributes created by :py:meth:`assoc` and, optionally, :py:meth:`aa_ci`. Parameters ========== figsize : , optional passed through to matplotlib.pyplot.figure dpi : int, optional passed through to matplotlib.pyplot.figure asympt : boolean, optional True to overplot the line of asymptotic association number show : boolean, optional Show the plot? (if false, will create without showing) norm : boolean, optional Normalize plot to the asymptotic association number title : string, optional label/title for the plot xlabel : string, optional label to put on the X axis of the resulting plot xscale : float, optional scale x-axis by this factor (e.g. 60.0 to convert seconds to minutes) ylabel : string, optional label to put on the Y axis of the resulting plot transparent : boolean, optional make c.i. patch transparent (default) """ try: dum = self.n_assoc except AttributeError: return 'Error: No association analysis results to plot' import datetime as dt import matplotlib as mpl import matplotlib.pyplot as plt fig = plt.figure(figsize=figsize, dpi=dpi) ax0 = fig.add_subplot(111) #fix such that self.lags (a timedelta) are converted to a time if type(self.lags[0]) == dt.timedelta: x = [i.seconds/60 + i.days*1440. for i in self.lags] else: x = self.lags if xscale != None: x = [i / xscale for i in x] ax0.set_xlim((min(x), max(x))) ci = self.ci if norm: if self.ci is not None: ci = [[j / self.asympt_assoc for j in self.ci[i]] for i in [0,1]] asympt_assoc = 1.0 assoc_total = [assoc / self.asympt_assoc for assoc in self.assoc_total] else: asympt_assoc = self.asympt_assoc assoc_total = self.assoc_total if ci is not None: if transparent: ax0.fill_between(x, ci[0], ci[1], edgecolor='none', facecolor='blue', alpha=0.5) else: ax0.fill_between(x, ci[0], ci[1], edgecolor='none', facecolor='#ABABFF') else: print('Error: No confidence intervals to plot - skipping') ax0.plot(x, assoc_total, 'b-', lw=1.0) if asympt: ax0.plot([x[0], x[-1]], [asympt_assoc]*2, 'r--', lw=1.0) if ylabel is None: if norm: plt.ylabel( 'Normalized Association Number n(u, h={0}) / n({1}, h={0})'.format( self.winhalf, r'$\\mathrm{u\rightarrow\infty}$' if plt.rcParams['text.usetex'] else 'u->Inf')) else: plt.ylabel('Association Number n(u, h={0})'.format( self.winhalf)) else: plt.ylabel(ylabel) plt.xlabel(xlabel) if title is not None: plt.title(title) if show: plt.show() return None return fig
[docs] def aa_ci(self, inter, n_boots=1000, seed=None): """Get bootstrap confidence intervals for association number Requires input of desired confidence interval, e.g.: >>> obj.aa_ci(95) Upper and lower confidence limits are added to :attr:`~PPro.ci`. After calling, :attr:`~PPro.conf_above` will contain the confidence (in percent) that the association number at that lag is *above* the asymptotic association number. (The confidence of being below is 100 - conf_above) For minor variations in conf_above to be meaningful, a *large* number of bootstraps is required. (Rougly, 1000 to be meaningful to the nearest percent; 10000 to be meaningful to a tenth of a percent.) A conf_above of 100 usually indicates an insufficient sample size to resolve, *not* perfect certainty. Note also that a 95% chance of being above indicates an exclusion from the *90%* confidence interval! Parameters ========== inter : float percentage confidence interval to calculate n_boots : int, optional number of bootstrap iterations to run seed : int, optional seed for the random number generator. If not specified, Python code will use numpy's RNG and its current seed; C code will seed from the clock. Warnings ======== If ``seed`` is specified, results may not be reproducible between systems with different sizes for C long type. Note that 64-bit Windows uses a 32-bit long and so results will be the same between 64 and 32-bit Windows, but not between 64-bit Windows and other 64-bit operating systems. If ``seed`` is not specified, results are not reproducible anyhow. """ lags = self.lags ci_low = np.empty([len(lags)]) ci_high = np.empty([len(lags)]) conf_above = np.empty([len(lags)]) long_size = ctypes.sizeof(ctypes.c_long) * 8 if seed is not None: np.random.seed(seed) # numpy random seeds must always be 32-bit seed_size = long_size if lib.have_libspacepy else 32 minseed = -2 ** (seed_size - 1) maxseed = 2 ** (seed_size - 1) - 1 #randint used to be system-size signed integer only. #so used that and cast to the required unsigned later #cast does not lose entropy: negative numbers map to high positives. #For reproducibility, keep doing that even though dtype #kwarg now available. lag_seeds = np.random.randint(minseed, maxseed, [len(lags)]) if not lib.have_libspacepy: lag_seeds = np.require(lag_seeds, np.int32) newtype = np.dtype('u' + str(lag_seeds.dtype)) lag_seeds = np.require(lag_seeds, dtype=newtype) if not lib.have_libspacepy: for i in range(len(lags)): if seed != None: np.random.seed(lag_seeds[i]) ci_low[i], ci_high[i], conf_above[i]= boots_ci( self.n_assoc[i, :], n_boots, inter, np.add.reduce, seed=None, target=self.asympt_assoc) else: perc_low = (100.-inter)/2. #set confidence interval perc_high = inter + perc_low dtype = 'int' + str(long_size) assoc_totals = np.empty([len(lags), n_boots], dtype=dtype, order='C') if seed is None: clock_seed = ctypes.c_int(1) lag_seeds = np.empty([len(lags)], dtype=dtype) else: clock_seed = ctypes.c_int(0) def thread_targ(start, size): lib.aa_ci( self.n_assoc[start:start+size].ctypes.data_as(lib.ulptr), assoc_totals[start:start+size].ctypes.data_as(lib.ulptr), size, len(self.process1), n_boots, lag_seeds[start:start+size].ctypes.data_as(lib.ulptr), clock_seed) tb.thread_job(len(lags), 0, thread_targ) for i in range(len(lags)): assoc_totals[i, :].sort() ci_low[i], ci_high[i] = np.percentile( assoc_totals[i, :], (perc_low,perc_high)) conf_above[i] = 100.0 - value_percentile(assoc_totals[i, :], self.asympt_assoc) self.ci = [ci_low, ci_high] self.conf_above = conf_above
#Functions outside class
[docs] def plot_two_ppro(pprodata, pproref, ratio=None, norm=False, title=None, xscale=None, figsize=None, dpi=80, ylim=[None, None], log=False, xticks=None, yticks=None): """Overplots two PPro objects Parameters ========== pprodata : PPro first point process to plot (in blue) pproref : PPro second process to plot (in red) ratio : float multiply L{pprodata} by this ratio before plotting, useful for comparing processes of different magnitude norm : boolean normalize everything to L{pproref}, i.e. the association number for L{pproref} will always plot as 1. title : string title to put on the plot xscale : float scale x-axis by this factor (e.g. 60.0 to convert seconds to minutes) figsize : passed through to matplotlib.pyplot.figure dpi : int passed through to matplotlib.pyplot.figure ylim : list [minimum, maximum] values of y for the axis log : bollean True for a log plot xticks : sequence or float if provided, a list of tickmarks for the X axis yticks : sequance or float if provided, a list of tickmarks for the Y axis """ import matplotlib.pyplot as plt if ratio is None: ratio = float(pproref.asympt_assoc) / pprodata.asympt_assoc lags = pproref.lags nlags = len(lags) assert lags[:] == pprodata.lags[:] if xscale is not None: lags = [float(i) / xscale for i in lags] fig = plt.figure(figsize=figsize, dpi=dpi) plt.subplots_adjust(wspace=0.0, hspace=0.0) ax0 = fig.add_subplot(111) ax0.set_xlim((lags[0], lags[-1])) if norm: scaleddata = [ratio * float(pprodata.assoc_total[i]) / pproref.assoc_total[i] for i in range(nlags)] scaledhi = [ratio * float(pprodata.ci[1][i]) / pproref.assoc_total[i] for i in range(nlags)] scaledlo = [ratio * float(pprodata.ci[0][i]) / pproref.assoc_total[i] for i in range(nlags)] scaledref = [1.0 for i in range(nlags)] refhi = [float(pproref.ci[1][i]) / pproref.assoc_total[i] for i in range(nlags)] reflo = [float(pproref.ci[0][i]) / pproref.assoc_total[i] for i in range(nlags)] else: scaleddata = [ratio * float(pprodata.assoc_total[i]) for i in range(nlags)] scaledhi = [ratio * float(pprodata.ci[1][i]) for i in range(nlags)] scaledlo = [ratio * float(pprodata.ci[0][i]) for i in range(nlags)] scaledref = pproref.assoc_total refhi = pproref.ci[1] reflo = pproref.ci[0] ax0.fill_between(lags, reflo, refhi, edgecolor='none', facecolor='#FFABAB', interpolate=True) ax0.fill_between(lags, scaledlo, scaledhi, edgecolor='none', facecolor='#ABABFF', interpolate=True) bottom = np.fromiter((max([scaledlo[i], reflo[i]]) for i in range(nlags)), np.float64, count=-1) top = np.fromiter((min([scaledhi[i], refhi[i]]) for i in range(nlags)), np.float64, count=-1) ax0.fill_between(lags, bottom, top, where=(bottom <= top), edgecolor='none', facecolor='#AB81D5', interpolate=True) ax0.plot(lags, scaleddata, lw=1.0) ax0.plot(lags, scaledref, 'r--', lw=1.0) ax0.set_ylim(bottom = 0 if ylim[0] is None else ylim[0]) if ylim[1] is not None: ax0.set_ylim(top=ylim[1]) if log: ax0.set_yscale('log', nonposy='clip') if xticks: ax0.set_xticks(xticks) if yticks: ax0.set_yticks(yticks) if title: plt.title(title)
[docs] def boots_ci(data, n, inter, func, seed=None, target=None, sample_size=None, usepy=False, nretvals=1): """Construct bootstrap confidence interval The bootstrap is a statistical tool that uses multiple samples derived from the original data (called surrogates) to estimate a parameter of the population from which the sample was drawn. This assumes that the sample is randomly drawn and hence is representative of the underlying distribution. The benefit of the bootstrap is that it is non-parametric and can be applied in situations where there is reasonable doubt about the characteristics of the underlying distribution. This routine uses the boot- strap for its most common application - the estimation of confidence intervals. Examples ======== >>> data, n = numpy.random.lognormal(mean=5.1, sigma=0.3, size=3000), 4000. >>> myfunc = lambda x: numpy.median(x) >>> ci_low, ci_high = poppy.boots_ci(data, n, 95, myfunc) >>> ci_low, numpy.median(data), ci_high (163.96354196633686, 165.2393331896551, 166.60491435416566) iter. 1 ... repeat (162.50379144492726, 164.15218265100233, 165.42840588032755) iter. 2 For comparison >>> data = numpy.random.lognormal(mean=5.1, sigma=0.3, size=90000) >>> numpy.median(data) 163.83888237895815 Note that the true value of the desired quantity may lie outside the 95% confidence interval one time in 20 realizations. This occurred for the first iteration here. For the lognormal distribution, the median is found exactly by taking the exponential of the "mean" parameter. Thus here, the theoretical median is 164.022 (6 s.f.) and this is well captured by the above bootstrap confidence interval. Parameters ========== data : array like data to bootstrap n : int number of surrogate series to select, i.e. number of bootstrap iterations. inter : numerical desired percentage confidence interval func : callable Function to apply to each surrogate series sample_size : int number of samples in the surrogate series, default length of L{data}. This will change the statistical properties of the bootstrap and should only be used for good reason! seed : int Optional seed for the random number generator. If not specified, numpy generator will not be reseeded; C generator will be seeded from the clock. target : same as data a 'target' value. If specified, will also calculate percentage confidence of being at or above this value. nretvals : int number of return values from input function Returns ======= out : sequence of float inter percent confidence interval on value derived from func applied to the population sampled by data. If target is specified, also the percentage confidence of being above that value. """ perc_low = (100.-inter)/2. #set confidence interval perc_high = inter + perc_low n_els = len(data) if n_els <= 2: return np.nan, np.nan if target is None else np.nan, np.nan, np.nan if sample_size is None: sample_size = n_els if not lib.have_libspacepy or usepy: surr_quan = np.empty([n, nretvals] if nretvals > 1 else [n]) if seed is not None: np.random.seed(seed) ran_el = np.random.randint(n_els, size=[n, sample_size]) for i in range(int(n)): #compute n bootstrapped series surr_ser = np.array([data[rec] for rec in ran_el[i, :]]) #resample w/ replace surr_quan[i] = func(surr_ser) #get desired quantity from surrogates if len(surr_quan.shape) == 1: surr_quan.sort() else: surr_quan = surr_quan[surr_quan.argsort(axis=0)[:,0]] else: n = int(n) data = (ctypes.c_double * n_els)(*data) surr_ser = (ctypes.c_double * (n * sample_size))() if seed is None: seed = 0 clock_seed = ctypes.c_int(1) else: clock_seed= ctypes.c_int(0) lib.boots(surr_ser, data, ctypes.c_ulong(n), ctypes.c_ulong(n_els), ctypes.c_ulong(sample_size), ctypes.c_ulong(seed), clock_seed) gen = [func(surr_ser[i * sample_size:(i + 1) * sample_size]) for i in range(n)] #Can't sort if more than one return value surr_quan = sorted(gen) if nretvals == 1 else np.stack(gen) #get confidence interval if nretvals>1: pul = np.empty((2, nretvals)) for nn in range(nretvals): pul[:,nn] = np.percentile(surr_quan[:,nn], (perc_low,perc_high)) else: pul = np.percentile(surr_quan, (perc_low,perc_high)) if target is None: return pul[0], pul[1] vp = value_percentile(surr_quan, target) return pul[0], pul[1], 100.0 - vp
[docs] def value_percentile(sequence, target): """Find the percentile of a particular value in a sequence Parameters ========== sequence : sequence a sequence of values, sorted in ascending order target : same type as sequence a target value Returns ======= out : float the percentile of target in sequence """ min = bisect.bisect_left(sequence, target) #first value >= x max = bisect.bisect_right(sequence, target, lo=min) #first value > x #min:max, in Python syntax, will include all values that are target count = len(sequence) if max == 0: #everything is bigger return 0.0 if min == count: #everything is smaller return 100.0 #Index 0 is 0th percentile (by definition); count-1 is 100th #so index n is (n * 100.0) / (count - 1) #find a 'virtual' index if min == max: #Not equal to any, between min-1 and min #interpolate between two points based on value. idx = min - 1 + float(target - sequence[min - 1]) / \ (sequence[min] - sequence[min - 1]) else: #Equal to one or more values (min...max-1), average them idx = (min + max - 1) / 2 return idx * 100.0 / (count - 1)
[docs] def applyRefractory(process1, period): '''Apply a refractory period to an input discrete event time sequence All events in the refractory period are removed from the point process. Parameters ========== process1 : iterable an iterable of datetimes, or a spacepy.time.Ticktock period : datetime.timedelta length of refractory period Returns ======= keep : iterable returns pruned set of datetimes with same type as input NOTE: array subclasses will be lost ''' import spacepy.time as spt if isinstance(process1, spt.Ticktock): #SpacePy Ticktock p1 = dm.dmcopy(process1.UTC).tolist() tickt = True elif isinstance(process1[0], dt.datetime): #iterable containing datetimes p1 = dm.dmcopy(process1) try: p1 = p1.tolist() wasArr = True except: wasArr = False tickt = False else: raise NotImplementedError('Input process must be a list/array of datetimes, or a spacepy.time.Ticktock') try: assert period.seconds except AssertionError: raise AttributeError('period must be a datetime.timedelta') done = len(p1)<2 keep, discard = [], [] while not done: t1 = p1[0] t2 = t1 + period inds = tb.tOverlapHalf([t1, t2], p1[1:]) for idx in inds: discard.append(p1.pop(idx + 1)) keep.append(p1.pop(0)) # put test element into keep array done = len(p1) < 2 if tickt: return spt.Ticktock(keep) return np.array(keep) if wasArr else keep