Source code for astroimtools.stats

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
Statistics tools.
"""

import numpy as np
from astropy.nddata import NDData, support_nddata
from astropy.stats import (SigmaClip, biweight_location, biweight_midvariance,
                           mad_std)
from astropy.table import Table
from astropy.utils import lazyproperty
from astropy.utils.decorators import deprecated_renamed_argument

from .utils import mask_databounds

__all__ = ['minmax', 'NDDataStats', 'nddata_stats']



[docs]
@support_nddata
def minmax(data, mask=None, axis=None):
    """
    Return the minimum and maximum values of an array (or along an array
    axis).

    Parameters
    ----------
    data : array-like
        The input data.

    mask : array_like (bool), optional
        A boolean mask, with the same shape as ``data``, where a `True`
        value indicates the corresponding element of ``data`` is masked.

    axis : int, optional
        The axis along which to operate.  By default, flattened input is
        used.

    Returns
    -------
    min : scalar or `~numpy.ndarray`
        The minimum value of ``data``.  If ``axis`` is `None`, the
        result is a scalar value.  If ``axis`` is input, the result is
        an array of dimension ``data.ndim - 1``.

    max : scalar or `~numpy.ndarray`
        The maximum value of ``data``.  If ``axis`` is `None`, the
        result is a scalar value.  If ``axis`` is input, the result is
        an array of dimension ``data.ndim - 1``.

    Notes
    -----
    This function is decorated with `~astropy.nddata.support_nddata` and
    thus supports `~astropy.nddata.NDData` objects as input.

    Examples
    --------
    >>> import numpy as np
    >>> from astroimtools import minmax
    >>> np.random.seed(12345)
    >>> data = np.random.random((3, 3))
    >>> minmax(data)  # doctest: +FLOAT_CMP
    (0.18391881167709445, 0.9645145197356216)

    >>> mask = (data < 0.3)
    >>> minmax(data, mask=mask)  # doctest: +FLOAT_CMP
    (0.3163755545817859, 0.9645145197356216)

    >>> minmax(data, axis=1)  # doctest: +FLOAT_CMP
    (array([0.18391881, 0.20456028, 0.6531771 ]),
     array([0.92961609, 0.5955447 , 0.96451452]))
    """
    if mask is not None:
        funcs = [np.ma.min, np.ma.max]
        data = np.ma.masked_array(data, mask=mask)
    else:
        funcs = [np.min, np.max]

    return funcs[0](data, axis=axis), funcs[1](data, axis=axis)




[docs]
class NDDataStats:
    """
    Class to calculate (sigma-clipped) image statistics on NDData
    objects.

    Set the ``sigma_clip`` keyword to perform sigma clipping.

    Parameters
    ----------
    nddata : `~astropy.nddata.NDData`
        NDData object containing the data array (and an optional mask)
        on which to calculate statistics.  Masked pixels are excluded
        when computing the image statistics.

    sigma_clip : `astropy.stats.SigmaClip` instance, optional
        A `~astropy.stats.SigmaClip` object that defines the sigma
        clipping parameters.  If `None` then no sigma clipping will be
        performed (default).

    lower_bound : float, optional
        The minimum data value to include in the statistics.  All pixel
        values less than ``lower_bound`` will be ignored.  `None` means
        that no lower bound is applied (default).

    upper_bound : float, optional
        The maximum data value to include in the statistics.  All pixel
        values greater than ``upper_bound`` will be ignored.  `None`
        means that no upper bound is applied (default).

    mask_value : float, optional
        A data value (e.g., ``0.0``) to be masked.  ``mask_value`` will
        be masked in addition to any input ``mask``.

    mask_invalid : bool, optional
        If `True` (the default), then any unmasked invalid values (e.g.
        NaN, inf) will be masked.

    Examples
    --------
    >>> import numpy as np
    >>> from astropy.nddata import NDData
    >>> from astroimtools import NDDataStats
    >>> data = np.arange(10)
    >>> data[0] = 100.
    >>> nddata = NDData(data)
    >>> stats = NDDataStats(nddata)
    >>> stats.mean
    14.5
    >>> stats.std  # doctest: +FLOAT_CMP
    28.605069480775605
    >>> stats.mad_std  # doctest: +FLOAT_CMP
    3.706505546264005
    >>> from astropy.stats import SigmaClip
    >>> sigclip = SigmaClip(sigma=2.5)
    >>> stats = NDDataStats(nddata, sigma_clip=sigclip)
    >>> stats.mean
    5.0
    >>> stats.std  # doctest: +FLOAT_CMP
    2.581988897471611
    >>> stats.mad_std  # doctest: +FLOAT_CMP
    2.965204437011204
    """

    @deprecated_renamed_argument('sigma', 'sigma_clip', '0.2')
    def __init__(self, nddata, sigma_clip=None, lower_bound=None,
                 upper_bound=None, mask_value=None, mask_invalid=True):

        if not isinstance(nddata, NDData):
            raise TypeError('nddata input must be an astropy.nddata.NDData '
                            'instance')

        # update the mask
        mask = mask_databounds(nddata.data, mask=nddata.mask,
                               lower_bound=lower_bound,
                               upper_bound=upper_bound, value=mask_value,
                               mask_invalid=mask_invalid)

        if np.all(mask):
            raise ValueError('All data values are masked')

        # remove masked values
        data = nddata.data[~mask]

        if sigma_clip is not None:
            if not isinstance(sigma_clip, SigmaClip):
                raise TypeError('sigma_clip must be an '
                                'astropy.stats.SigmaClip instance')

            data = sigma_clip(data, masked=False, axis=None)  # 1D ndarray

        self.goodvals = data.ravel()  # 1D array
        self._original_npixels = nddata.data.size

    def __getitem__(self, key):
        return getattr(self, key, None)

    @lazyproperty
    def npixels(self):
        """
        The number of good (unmasked/unclipped) pixels.
        """
        return len(self.goodvals)

    @lazyproperty
    def nrejected(self):
        """
        The number of rejected (masked/clipped) pixels.
        """
        return self._original_npixels - self.npixels

    @lazyproperty
    def mean(self):
        """
        The mean of pixel values.
        """
        return np.mean(self.goodvals)

    @lazyproperty
    def median(self):
        """
        The median of the pixel values.
        """
        return np.median(self.goodvals)

    @lazyproperty
    def mode(self):
        """
        The mode of the pixel values.

        The mode is estimated simply as ``(3 * median) - (2 * mean)``.
        """
        return 3. * np.median(self.goodvals) - 2. * np.mean(self.goodvals)

    @lazyproperty
    def std(self):
        """
        The standard deviation of the pixel values.
        """
        return np.std(self.goodvals)

    @lazyproperty
    def min(self):
        """
        The minimum pixel value.
        """
        return np.min(self.goodvals)

    @lazyproperty
    def max(self):
        """
        The maximum pixel value.
        """
        return np.max(self.goodvals)

    @lazyproperty
    def mad_std(self):
        r"""
        A robust standard deviation using the `median absolute deviation
        (MAD)
        <https://en.wikipedia.org/wiki/Median_absolute_deviation>`_.
        The MAD is defined as ``median(abs(a - median(a)))``.

        The standard deviation estimator is given by:

        .. math::

            \sigma \approx \frac{\textrm{MAD}}{\Phi^{-1}(3/4)}
            \approx 1.4826 \ \textrm{MAD}

        where :math:`\Phi^{-1}(P)` is the normal inverse cumulative
        distribution function evaluated at probability :math:`P = 3/4`.
        """
        return mad_std(self.goodvals)

    @lazyproperty
    def biweight_location(self):
        """
        The biweight location of the pixel values.
        """
        return biweight_location(self.goodvals)

    @lazyproperty
    def biweight_midvariance(self):
        """
        The biweight midvariance of the pixel values.
        """
        return biweight_midvariance(self.goodvals)

    @lazyproperty
    def skew(self):
        """
        The skew of the pixel values.
        """
        from scipy.stats import skew
        return skew(self.goodvals)

    @lazyproperty
    def kurtosis(self):
        """
        The kurtosis of the pixel values.
        """
        from scipy.stats import kurtosis
        return kurtosis(self.goodvals)




[docs]
@deprecated_renamed_argument('sigma', 'sigma_clip', '0.2')
def nddata_stats(nddata, sigma_clip=None, columns=None, lower_bound=None,
                 upper_bound=None, mask_value=None, mask_invalid=True):
    """
    Calculate various statistics on the input data.

    Set the ``sigma_clip`` keyword to perform sigma clipping.

    Parameters
    ----------
    nddata : `~astropy.nddata.NDData` or list of `~astropy.nddata.NDData`
        `~astropy.nddata.NDData` object containing the data array and
        optional mask on which to calculate statistics.  Masked pixels
        are excluded when computing the image statistics.

    sigma_clip : `astropy.stats.SigmaClip` instance, optional
        A `~astropy.stats.SigmaClip` object that defines the sigma
        clipping parameters.  If `None` then no sigma clipping will be
        performed (default).

    columns : str or list of str, optional
        The names of columns, in order, to include in the output
        `~astropy.table.Table`.  The column names can include any of the
        following statistic names: 'biweight_location',
        'biweight_midvariance', 'kurtosis', 'mad_std', 'max', 'mean',
        'median', 'min', 'mode', 'npixels', 'nrejected', 'skew', or
        'std'.  The column names can also include a name of a key in the
        `astropy.nddata.NDData.meta` dictionary.  The default is
        ``['npixels', 'mean', 'std', 'min', 'max']``.

    lower_bound : float, optional
        The minimum data value to include in the statistics.  All pixel
        values less than ``lower_bound`` will be ignored.  `None` means
        that no lower bound is applied (default).

    upper_bound : float, optional
        The maximum data value to include in the statistics.  All pixel
        values greater than ``upper_bound`` will be ignored.  `None` means
        that no upper bound is applied (default).

    mask_value : float, optional
        A data value (e.g., ``0.0``) to be masked.  ``mask_value`` will
        be masked in addition to any input ``mask``.

    mask_invalid : bool, optional
        If `True` (the default), then any unmasked invalid values (e.g.
        NaN, inf) will be masked.

    Returns
    -------
    table : `~astropy.table.Table`
        A table containing the calculated image statistics (or
        `~astropy.nddata.NDData` metadata).  Each table row corresponds
        to a single data array.

    Examples
    --------
    >>> import numpy as np
    >>> from astropy.nddata import NDData
    >>> from astroimtools import nddata_stats
    >>> data = np.arange(10)
    >>> data[0] = 100.
    >>> nddata = NDData(data)
    >>> columns = ['mean', 'median', 'mode', 'std', 'mad_std', 'min', 'max']
    >>> tbl = nddata_stats(nddata, columns=columns)
    >>> for col in tbl.colnames:
    ...     tbl[col].info.format = '%.8g'  # for consistent table output
    >>> print(tbl)
    mean median  mode    std     mad_std  min max
    ---- ------ ----- --------- --------- --- ---
    14.5    5.5 -12.5 28.605069 3.7065055   1 100
    >>> from astropy.stats import SigmaClip
    >>> sigclip = SigmaClip(sigma=2.5)
    >>> tbl = nddata_stats(nddata, sigma_clip=sigclip, columns=columns)
    >>> for col in tbl.colnames:
    ...     tbl[col].info.format = '%.8g'  # for consistent table output
    >>> print(tbl)
    mean median mode    std     mad_std  min max
    ---- ------ ---- --------- --------- --- ---
       5      5    5 2.5819889 2.9652044   1   9
    """
    stats = []
    if not isinstance(nddata, list):
        nddata = np.atleast_1d(nddata)

    for nddata_obj in nddata:
        stats.append(NDDataStats(
            nddata_obj, sigma_clip=sigma_clip, lower_bound=lower_bound,
            upper_bound=upper_bound, mask_value=mask_value,
            mask_invalid=mask_invalid))

    output_columns = None
    default_columns = ['npixels', 'mean', 'std', 'min', 'max']
    allowed_columns = ['biweight_location', 'biweight_midvariance',
                       'kurtosis', 'mad_std', 'max', 'mean', 'median', 'min',
                       'mode', 'npixels', 'nrejected', 'skew', 'std']

    if columns is None:
        output_columns = default_columns
    else:
        output_columns = np.atleast_1d(columns)

    output_table = Table()
    for column in output_columns:
        if column not in allowed_columns:
            values = [nddata_obj.meta.get(column, None) for nddata_obj
                      in nddata]
        else:
            values = [getattr(stat, column) for stat in stats]
        output_table[column] = values

    return output_table
Navigation

Source code for astroimtools.stats