Source code for halotools.utils.conditional_percentile

"""
"""
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
from astropy.utils.misc import NumpyRNGContext

from .array_utils import unsorting_indices
from .engines import cython_conditional_rank_kernel

__all__ = ('sliding_conditional_percentile', )



[docs]
def sliding_conditional_percentile(x, y, window_length, assume_x_is_sorted=False,
            add_subgrid_noise=True, seed=None):
    r""" Estimate the cumulative distribution function Prob(< y | x).

    Parameters
    ----------
    x : ndarray
        Array of shape (npts, )

    y : ndarray
        Array of shape (npts, )

    window_length : int
        Integer must be odd and less than ``npts``

    assume_x_is_sorted : bool, optional
        Performance enhancement flag that can be used for cases where input `x`
        has already been sorted. Default is False.

    add_subgrid_noise : bool, optional
        Flag determines whether random uniform noise will be added to fill in
        the gaps at the sub-grid level determined by `window_length`. Default is True.

    seed : int, optional
        Random number seed used together with the `add_subgrid_noise` argument
        to minimize discreteness effects due to the finite window size over which
        Prob(< y | x) is estimated. Default is None, for stochastic results.

    Returns
    -------
    rank_order_percentiles : ndarray
        Numpy array of shape (npts, ) storing values in the open interval (0, 1).
        Larger values of the returned array correspond to values of ``y``
        that are larger-than-average for the corresponding value of ``x``.

    Notes
    -----
    The ``window_length`` argument controls the precision of the calculation,
    and also the performance. For estimations of Prob(< y | x) with sub-percent accuracy,
    values of ``window_length`` must exceed 100.

    See :ref:`cam_tutorial` demonstrating how to use this
    function in galaxy-halo modeling with several worked examples.

    Examples
    --------
    >>> x = np.random.rand(100)
    >>> y = np.random.rand(100)
    >>> window_length = 5
    >>> result = sliding_conditional_percentile(x, y, window_length)
    """
    rank_orders = cython_sliding_rank(x, y, window_length,
                assume_x_is_sorted=assume_x_is_sorted)
    rank_order_percentiles = (1. + rank_orders)/float(window_length+1)

    if add_subgrid_noise:
        dp = 1./float(window_length+1)
        low = rank_order_percentiles - dp
        high = rank_order_percentiles + dp
        npts = len(rank_order_percentiles)
        with NumpyRNGContext(seed):
            rank_order_percentiles = np.random.uniform(low, high, npts)

    return rank_order_percentiles



def rank_order_function(x):
    r""" Calculate the rank-order of each element in an input array.

    Parameters
    ----------
    x : ndarray
        Array of shape (npts, )

    Results
    -------
    rank_orders : ndarray
        Integer array of shape (npts, ) storing values in the interval [0, npts-1]
    """
    x = np.atleast_1d(x)
    assert x.ndim == 1, "x must be a 1-d sequence"
    assert len(x) > 1, "x must have more than one element"

    return unsorting_indices(np.argsort(x))


def cython_sliding_rank(x, y, window_length, assume_x_is_sorted=False):
    r"""
    Return an array storing the rank-order of each element element in y
    computed over a fixed window length at each x

    This function is the kernel of calculation of Prob(< y | x).

    Parameters
    ----------
    x : ndarray
        Array of shape (npts, )

    y : ndarray
        Array of shape (npts, )

    window_length : int
        Integer must be odd and less than ``npts``

    assume_x_is_sorted : bool, optional
        Performance enhancement flag that can be used for cases where input `x`
        has already been sorted. Default is False.

    Returns
    -------
    sliding_rank_orders : ndarray
        Integer array of shape (npts, ) storing values between 0 and window_length-1

    Examples
    --------
    >>> x = np.random.rand(100)
    >>> y = np.random.rand(100)
    >>> window_length = 5
    >>> result = cython_sliding_rank(x, y, window_length)
    """
    x, y, nwin = _check_xyn_bounds(x, y, window_length)
    nhalfwin = int(nwin/2)

    if assume_x_is_sorted:
        y_sorted = y
    else:
        indx_x_sorted = np.argsort(x)
        indx_x_unsorted = unsorting_indices(indx_x_sorted)
        y_sorted = y[indx_x_sorted]

    result = np.array(cython_conditional_rank_kernel(y_sorted, nwin))

    leftmost_window_ranks = rank_order_function(y_sorted[:nwin])
    result[:nhalfwin+1] = leftmost_window_ranks[:nhalfwin+1]

    rightmost_window_ranks = rank_order_function(y_sorted[-nwin:])
    result[-nhalfwin-1:] = rightmost_window_ranks[-nhalfwin-1:]

    if assume_x_is_sorted:
        return result.astype(int)
    else:
        return result[indx_x_unsorted].astype(int)


def _check_xyn_bounds(x, y, n):
    r""" Enforce bounds checks on the inputs
    and return 1-d Numpy arrays with appropriate dtype
    """
    x = np.atleast_1d(x).astype('f8')
    assert x.ndim == 1, "x must be a 1-d array"
    y = np.atleast_1d(y).astype('f8')
    assert y.ndim == 1, "y must be a 1-d array"

    assert len(x) == len(y), "x and y must have the same length"

    msg = "Window length = {0} must be an odd integer"
    try:
        assert n % 2 == 1, msg.format(n)
    except AssertionError:
        raise ValueError(msg.format(n))

    msg2 = "Window length = {0} must satisfy 1 < n < len(x)"
    assert 1 < n < len(x), msg2.format(n)

    return x, y, int(n)
Navigation

Source code for halotools.utils.conditional_percentile