Source code for halotools.mock_observables.void_statistics.underdensity_prob_func

"""
Module containing the `~halotools.mock_observables.void_prob_func`
and `~halotools.mock_observables.underdensity_prob_func` used to calculate void statistics.
"""

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np

from astropy.utils.misc import NumpyRNGContext

from ..pair_counters import npairs_per_object_3d

from ...utils.array_utils import array_is_monotonic
from ...custom_exceptions import HalotoolsError


__all__ = ("underdensity_prob_func",)
__author__ = ["Duncan Campbell", "Andrew Hearin"]


np.seterr(divide="ignore", invalid="ignore")  # ignore divide by zero in e.g. DD/RR



[docs]
def underdensity_prob_func(
    sample1,
    rbins,
    n_ran=None,
    random_sphere_centers=None,
    period=None,
    sample_volume=None,
    u=0.2,
    num_threads=1,
    approx_cell1_size=None,
    approx_cellran_size=None,
    seed=None,
):
    """
    Calculate the underdensity probability function (UPF), :math:`P_U(r)`.

    :math:`P_U(r)` is defined as the probability that a randomly placed sphere of size
    :math:`r` encompases a volume with less than a specified number density.

    See the :ref:`mock_obs_pos_formatting` documentation page for
    instructions on how to transform your coordinate position arrays into the
    format accepted by the ``sample1`` argument.

    See also :ref:`galaxy_catalog_analysis_tutorial8`.

    Parameters
    ----------
    sample1 : array_like
        Npts1 x 3 numpy array containing 3-D positions of points.
        See the :ref:`mock_obs_pos_formatting` documentation page, or the
        Examples section below, for instructions on how to transform
        your coordinate position arrays into the
        format accepted by the ``sample1`` and ``sample2`` arguments.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    rbins : float
        size of spheres to search for neighbors
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    n_ran : int, optional
        integer number of randoms to use to search for voids.
        If ``n_ran`` is not passed, you must pass ``random_sphere_centers``.

    random_sphere_centers : array_like, optional
        Npts x 3 array of randomly selected positions to drop down spheres
        to use to measure the `void_prob_func`. If ``random_sphere_centers``
        is not passed, ``n_ran`` must be passed.

    period : array_like, optional
        Length-3 sequence defining the periodic boundary conditions
        in each dimension. If you instead provide a single scalar, Lbox,
        period is assumed to be the same in all Cartesian directions.
        If set to None, PBCs are set to infinity, in which case ``sample_volume``
        must be specified so that the global mean density can be estimated.
        In this case, it is still necessary
        to drop down randomly placed spheres in order to compute the UPF. To do so,
        the spheres will be dropped inside a cubical box whose sides are defined by
        the smallest/largest coordinate distance of the input ``sample1``.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    sample_volume : float, optional
        If period is set to None, you must specify the effective volume of the sample.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    u : float, optional
        density threshold in units of the mean object density

    num_threads : int, optional
        number of 'threads' to use in the pair counting.  if set to 'max', use all
        available cores.  num_threads=0 is the default.

    approx_cell1_size : array_like, optional
        Length-3 array serving as a guess for the optimal manner by how points
        will be apportioned into subvolumes of the simulation box.
        The optimum choice unavoidably depends on the specs of your machine.
        Default choice is to use *max(rbins)* in each dimension,
        which will return reasonable result performance for most use-cases.
        Performance can vary sensitively with this parameter, so it is highly
        recommended that you experiment with this parameter when carrying out
        performance-critical calculations.

    approx_cellran_size : array_like, optional
        Analogous to ``approx_cell1_size``, but for used for randoms.  See comments for
        ``approx_cell1_size`` for details.

    seed : int, optional
        Random number seed used to randomly lay down spheres, if applicable.
        Default is None, in which case results will be stochastic.

    Returns
    -------
    upf : numpy.array
        *len(rbins)* length array containing the underdensity probability function
        :math:`P_U(r)` computed for each :math:`r` defined by input ``rbins``.

    Notes
    -----
    This function requires the calculation of the number of pairs per randomly placed
    sphere, and thus storage of an array of shape(n_ran,len(rbins)).  This can be a
    memory intensive process as this array becomes large.

    Examples
    --------
    For demonstration purposes we create a randomly distributed set of points within a
    periodic unit cube.

    >>> Npts = 10000
    >>> Lbox = 1.0
    >>> period = np.array([Lbox,Lbox,Lbox])

    >>> x = np.random.random(Npts)
    >>> y = np.random.random(Npts)
    >>> z = np.random.random(Npts)

    We transform our *x, y, z* points into the array shape used by the pair-counter by
    taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
    is used throughout the `~halotools.mock_observables` sub-package:

    >>> coords = np.vstack((x,y,z)).T

    >>> rbins = np.logspace(-2,-1,20)
    >>> n_ran = 1000
    >>> upf = underdensity_prob_func(coords, rbins, n_ran=n_ran, period=period, u=0.2)

    See also
    ----------
    :ref:`galaxy_catalog_analysis_tutorial8`
    """
    (
        sample1,
        rbins,
        n_ran,
        random_sphere_centers,
        period,
        sample_volume,
        u,
        num_threads,
        approx_cell1_size,
        approx_cellran_size,
    ) = _underdensity_prob_func_process_args(
        sample1,
        rbins,
        n_ran,
        random_sphere_centers,
        period,
        sample_volume,
        u,
        num_threads,
        approx_cell1_size,
        approx_cellran_size,
        seed,
    )

    result = npairs_per_object_3d(
        random_sphere_centers,
        sample1,
        rbins,
        period=period,
        num_threads=num_threads,
        approx_cell1_size=approx_cell1_size,
        approx_cell2_size=approx_cellran_size,
    )

    # calculate the number of galaxies as a
    # function of r that corresponds to the
    # specified under-density
    mean_rho = len(sample1) / sample_volume
    vol = (4.0 / 3.0) * np.pi * rbins**3
    N_max = mean_rho * vol * u

    num_underdense_spheres = np.array(
        [sum(result[:, i] <= N_max[i]) for i in range(len(N_max))]
    )
    return num_underdense_spheres / n_ran



def _underdensity_prob_func_process_args(
    sample1,
    rbins,
    n_ran,
    random_sphere_centers,
    period,
    sample_volume,
    u,
    num_threads,
    approx_cell1_size,
    approx_cellran_size,
    seed,
):
    """ """
    sample1 = np.atleast_1d(sample1)

    rbins = np.atleast_1d(rbins)
    try:
        assert rbins.ndim == 1
        assert len(rbins) > 1
        assert np.min(rbins) > 0
        if len(rbins) > 2:
            assert array_is_monotonic(rbins, strict=True) == 1
    except AssertionError:
        msg = (
            "\n Input ``rbins`` must be a monotonically increasing \n"
            "1-D array with at least two entries. All entries must be strictly positive."
        )
        raise HalotoolsError(msg)

    if period is None:
        xmin, xmax = np.min(sample1), np.max(sample1)
        ymin, ymax = np.min(sample1), np.max(sample1)
        zmin, zmax = np.min(sample1), np.max(sample1)
        if sample_volume is None:
            msg = "If period is None, you must pass in ``sample_volume``."
            raise HalotoolsError(msg)
        else:
            sample_volume = float(sample_volume)
    else:
        period = np.atleast_1d(period)
        if len(period) == 1:
            period = np.array([period, period, period])
        elif len(period) == 3:
            pass
        else:
            msg = "\nInput ``period`` must either be a float or length-3 sequence"
            raise HalotoolsError(msg)
        xmin, xmax = 0.0, float(period[0])
        ymin, ymax = 0.0, float(period[1])
        zmin, zmax = 0.0, float(period[2])
        if sample_volume is None:
            sample_volume = period.prod()
        else:
            msg = "If period is not None, do not pass in sample_volume"
            raise HalotoolsError(msg)

    if n_ran is None:
        if random_sphere_centers is None:
            msg = "You must pass either ``n_ran`` or ``random_sphere_centers``"
            raise HalotoolsError(msg)
        else:
            random_sphere_centers = np.atleast_1d(random_sphere_centers)
            try:
                assert random_sphere_centers.shape[1] == 3
            except AssertionError:
                msg = (
                    "Your input ``random_sphere_centers`` must have shape (Nspheres, 3)"
                )
                raise HalotoolsError(msg)
        n_ran = float(random_sphere_centers.shape[0])
    else:
        if random_sphere_centers is not None:
            msg = "If passing in ``random_sphere_centers``, do not also pass in ``n_ran``."
            raise HalotoolsError(msg)
        else:
            with NumpyRNGContext(seed):
                xran = np.random.uniform(xmin, xmax, n_ran)
                yran = np.random.uniform(ymin, ymax, n_ran)
                zran = np.random.uniform(zmin, zmax, n_ran)
            random_sphere_centers = np.vstack([xran, yran, zran]).T

    u = float(u)

    return (
        sample1,
        rbins,
        n_ran,
        random_sphere_centers,
        period,
        sample_volume,
        u,
        num_threads,
        approx_cell1_size,
        approx_cellran_size,
    )
Navigation

Source code for halotools.mock_observables.void_statistics.underdensity_prob_func