Source code for astroimtools.stats

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
Statistics tools.
"""

import numpy as np
from astropy.nddata import NDData, support_nddata
from astropy.stats import (SigmaClip, biweight_location, biweight_midvariance,
                           mad_std)
from astropy.table import Table
from astropy.utils import lazyproperty
from astropy.utils.decorators import deprecated_renamed_argument

from .utils import mask_databounds

__all__ = ['minmax', 'NDDataStats', 'nddata_stats']


[docs] @support_nddata def minmax(data, mask=None, axis=None): """ Return the minimum and maximum values of an array (or along an array axis). Parameters ---------- data : array-like The input data. mask : array_like (bool), optional A boolean mask, with the same shape as ``data``, where a `True` value indicates the corresponding element of ``data`` is masked. axis : int, optional The axis along which to operate. By default, flattened input is used. Returns ------- min : scalar or `~numpy.ndarray` The minimum value of ``data``. If ``axis`` is `None`, the result is a scalar value. If ``axis`` is input, the result is an array of dimension ``data.ndim - 1``. max : scalar or `~numpy.ndarray` The maximum value of ``data``. If ``axis`` is `None`, the result is a scalar value. If ``axis`` is input, the result is an array of dimension ``data.ndim - 1``. Notes ----- This function is decorated with `~astropy.nddata.support_nddata` and thus supports `~astropy.nddata.NDData` objects as input. Examples -------- >>> import numpy as np >>> from astroimtools import minmax >>> np.random.seed(12345) >>> data = np.random.random((3, 3)) >>> minmax(data) # doctest: +FLOAT_CMP (0.18391881167709445, 0.9645145197356216) >>> mask = (data < 0.3) >>> minmax(data, mask=mask) # doctest: +FLOAT_CMP (0.3163755545817859, 0.9645145197356216) >>> minmax(data, axis=1) # doctest: +FLOAT_CMP (array([0.18391881, 0.20456028, 0.6531771 ]), array([0.92961609, 0.5955447 , 0.96451452])) """ if mask is not None: funcs = [np.ma.min, np.ma.max] data = np.ma.masked_array(data, mask=mask) else: funcs = [np.min, np.max] return funcs[0](data, axis=axis), funcs[1](data, axis=axis)
[docs] class NDDataStats: """ Class to calculate (sigma-clipped) image statistics on NDData objects. Set the ``sigma_clip`` keyword to perform sigma clipping. Parameters ---------- nddata : `~astropy.nddata.NDData` NDData object containing the data array (and an optional mask) on which to calculate statistics. Masked pixels are excluded when computing the image statistics. sigma_clip : `astropy.stats.SigmaClip` instance, optional A `~astropy.stats.SigmaClip` object that defines the sigma clipping parameters. If `None` then no sigma clipping will be performed (default). lower_bound : float, optional The minimum data value to include in the statistics. All pixel values less than ``lower_bound`` will be ignored. `None` means that no lower bound is applied (default). upper_bound : float, optional The maximum data value to include in the statistics. All pixel values greater than ``upper_bound`` will be ignored. `None` means that no upper bound is applied (default). mask_value : float, optional A data value (e.g., ``0.0``) to be masked. ``mask_value`` will be masked in addition to any input ``mask``. mask_invalid : bool, optional If `True` (the default), then any unmasked invalid values (e.g. NaN, inf) will be masked. Examples -------- >>> import numpy as np >>> from astropy.nddata import NDData >>> from astroimtools import NDDataStats >>> data = np.arange(10) >>> data[0] = 100. >>> nddata = NDData(data) >>> stats = NDDataStats(nddata) >>> stats.mean 14.5 >>> stats.std # doctest: +FLOAT_CMP 28.605069480775605 >>> stats.mad_std # doctest: +FLOAT_CMP 3.706505546264005 >>> from astropy.stats import SigmaClip >>> sigclip = SigmaClip(sigma=2.5) >>> stats = NDDataStats(nddata, sigma_clip=sigclip) >>> stats.mean 5.0 >>> stats.std # doctest: +FLOAT_CMP 2.581988897471611 >>> stats.mad_std # doctest: +FLOAT_CMP 2.965204437011204 """ @deprecated_renamed_argument('sigma', 'sigma_clip', '0.2') def __init__(self, nddata, sigma_clip=None, lower_bound=None, upper_bound=None, mask_value=None, mask_invalid=True): if not isinstance(nddata, NDData): raise TypeError('nddata input must be an astropy.nddata.NDData ' 'instance') # update the mask mask = mask_databounds(nddata.data, mask=nddata.mask, lower_bound=lower_bound, upper_bound=upper_bound, value=mask_value, mask_invalid=mask_invalid) if np.all(mask): raise ValueError('All data values are masked') # remove masked values data = nddata.data[~mask] if sigma_clip is not None: if not isinstance(sigma_clip, SigmaClip): raise TypeError('sigma_clip must be an ' 'astropy.stats.SigmaClip instance') data = sigma_clip(data, masked=False, axis=None) # 1D ndarray self.goodvals = data.ravel() # 1D array self._original_npixels = nddata.data.size def __getitem__(self, key): return getattr(self, key, None) @lazyproperty def npixels(self): """ The number of good (unmasked/unclipped) pixels. """ return len(self.goodvals) @lazyproperty def nrejected(self): """ The number of rejected (masked/clipped) pixels. """ return self._original_npixels - self.npixels @lazyproperty def mean(self): """ The mean of pixel values. """ return np.mean(self.goodvals) @lazyproperty def median(self): """ The median of the pixel values. """ return np.median(self.goodvals) @lazyproperty def mode(self): """ The mode of the pixel values. The mode is estimated simply as ``(3 * median) - (2 * mean)``. """ return 3. * np.median(self.goodvals) - 2. * np.mean(self.goodvals) @lazyproperty def std(self): """ The standard deviation of the pixel values. """ return np.std(self.goodvals) @lazyproperty def min(self): """ The minimum pixel value. """ return np.min(self.goodvals) @lazyproperty def max(self): """ The maximum pixel value. """ return np.max(self.goodvals) @lazyproperty def mad_std(self): r""" A robust standard deviation using the `median absolute deviation (MAD) <https://en.wikipedia.org/wiki/Median_absolute_deviation>`_. The MAD is defined as ``median(abs(a - median(a)))``. The standard deviation estimator is given by: .. math:: \sigma \approx \frac{\textrm{MAD}}{\Phi^{-1}(3/4)} \approx 1.4826 \ \textrm{MAD} where :math:`\Phi^{-1}(P)` is the normal inverse cumulative distribution function evaluated at probability :math:`P = 3/4`. """ return mad_std(self.goodvals) @lazyproperty def biweight_location(self): """ The biweight location of the pixel values. """ return biweight_location(self.goodvals) @lazyproperty def biweight_midvariance(self): """ The biweight midvariance of the pixel values. """ return biweight_midvariance(self.goodvals) @lazyproperty def skew(self): """ The skew of the pixel values. """ from scipy.stats import skew return skew(self.goodvals) @lazyproperty def kurtosis(self): """ The kurtosis of the pixel values. """ from scipy.stats import kurtosis return kurtosis(self.goodvals)
[docs] @deprecated_renamed_argument('sigma', 'sigma_clip', '0.2') def nddata_stats(nddata, sigma_clip=None, columns=None, lower_bound=None, upper_bound=None, mask_value=None, mask_invalid=True): """ Calculate various statistics on the input data. Set the ``sigma_clip`` keyword to perform sigma clipping. Parameters ---------- nddata : `~astropy.nddata.NDData` or list of `~astropy.nddata.NDData` `~astropy.nddata.NDData` object containing the data array and optional mask on which to calculate statistics. Masked pixels are excluded when computing the image statistics. sigma_clip : `astropy.stats.SigmaClip` instance, optional A `~astropy.stats.SigmaClip` object that defines the sigma clipping parameters. If `None` then no sigma clipping will be performed (default). columns : str or list of str, optional The names of columns, in order, to include in the output `~astropy.table.Table`. The column names can include any of the following statistic names: 'biweight_location', 'biweight_midvariance', 'kurtosis', 'mad_std', 'max', 'mean', 'median', 'min', 'mode', 'npixels', 'nrejected', 'skew', or 'std'. The column names can also include a name of a key in the `astropy.nddata.NDData.meta` dictionary. The default is ``['npixels', 'mean', 'std', 'min', 'max']``. lower_bound : float, optional The minimum data value to include in the statistics. All pixel values less than ``lower_bound`` will be ignored. `None` means that no lower bound is applied (default). upper_bound : float, optional The maximum data value to include in the statistics. All pixel values greater than ``upper_bound`` will be ignored. `None` means that no upper bound is applied (default). mask_value : float, optional A data value (e.g., ``0.0``) to be masked. ``mask_value`` will be masked in addition to any input ``mask``. mask_invalid : bool, optional If `True` (the default), then any unmasked invalid values (e.g. NaN, inf) will be masked. Returns ------- table : `~astropy.table.Table` A table containing the calculated image statistics (or `~astropy.nddata.NDData` metadata). Each table row corresponds to a single data array. Examples -------- >>> import numpy as np >>> from astropy.nddata import NDData >>> from astroimtools import nddata_stats >>> data = np.arange(10) >>> data[0] = 100. >>> nddata = NDData(data) >>> columns = ['mean', 'median', 'mode', 'std', 'mad_std', 'min', 'max'] >>> tbl = nddata_stats(nddata, columns=columns) >>> for col in tbl.colnames: ... tbl[col].info.format = '%.8g' # for consistent table output >>> print(tbl) mean median mode std mad_std min max ---- ------ ----- --------- --------- --- --- 14.5 5.5 -12.5 28.605069 3.7065055 1 100 >>> from astropy.stats import SigmaClip >>> sigclip = SigmaClip(sigma=2.5) >>> tbl = nddata_stats(nddata, sigma_clip=sigclip, columns=columns) >>> for col in tbl.colnames: ... tbl[col].info.format = '%.8g' # for consistent table output >>> print(tbl) mean median mode std mad_std min max ---- ------ ---- --------- --------- --- --- 5 5 5 2.5819889 2.9652044 1 9 """ stats = [] if not isinstance(nddata, list): nddata = np.atleast_1d(nddata) for nddata_obj in nddata: stats.append(NDDataStats( nddata_obj, sigma_clip=sigma_clip, lower_bound=lower_bound, upper_bound=upper_bound, mask_value=mask_value, mask_invalid=mask_invalid)) output_columns = None default_columns = ['npixels', 'mean', 'std', 'min', 'max'] allowed_columns = ['biweight_location', 'biweight_midvariance', 'kurtosis', 'mad_std', 'max', 'mean', 'median', 'min', 'mode', 'npixels', 'nrejected', 'skew', 'std'] if columns is None: output_columns = default_columns else: output_columns = np.atleast_1d(columns) output_table = Table() for column in output_columns: if column not in allowed_columns: values = [nddata_obj.meta.get(column, None) for nddata_obj in nddata] else: values = [getattr(stat, column) for stat in stats] output_table[column] = values return output_table