# Source code for halotools.mock_observables.void_statistics.void_prob_func

```"""
Module containing the `~halotools.mock_observables.void_prob_func`
and `~halotools.mock_observables.underdensity_prob_func` used to calculate void statistics.
"""

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np

from astropy.utils.misc import NumpyRNGContext

from ..pair_counters import npairs_per_object_3d

from ...utils.array_utils import array_is_monotonic
from ...custom_exceptions import HalotoolsError

__all__ = ("void_prob_func",)
__author__ = ["Duncan Campbell", "Andrew Hearin"]

np.seterr(divide="ignore", invalid="ignore")  # ignore divide by zero in e.g. DD/RR

[docs]
def void_prob_func(
sample1,
rbins,
n_ran=None,
random_sphere_centers=None,
period=None,
approx_cell1_size=None,
approx_cellran_size=None,
seed=None,
):
"""
Calculate the void probability function (VPF), :math:`P_0(r)`,
defined as the probability that a random
sphere of radius *r* contains zero points in the input sample.

See the :ref:`mock_obs_pos_formatting` documentation page for
instructions on how to transform your coordinate position arrays into the
format accepted by the ``sample1`` argument.

Parameters
----------
sample1 : array_like
Npts1 x 3 numpy array containing 3-D positions of points.
See the :ref:`mock_obs_pos_formatting` documentation page, or the
Examples section below, for instructions on how to transform
your coordinate position arrays into the
format accepted by the ``sample1`` and ``sample2`` arguments.
Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

rbins : float
size of spheres to search for neighbors
Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

n_ran : int, optional
integer number of randoms to use to search for voids.
If ``n_ran`` is not passed, you must pass ``random_sphere_centers``.

random_sphere_centers : array_like, optional
Npts x 3 array of randomly selected positions to drop down spheres
to use to measure the `void_prob_func`. If ``random_sphere_centers``
is not passed, ``n_ran`` must be passed.

period : array_like, optional
Length-3 sequence defining the periodic boundary conditions
in each dimension. If you instead provide a single scalar, Lbox,
period is assumed to be the same in all Cartesian directions.
If set to None, PBCs are set to infinity. In this case, it is still necessary
to drop down randomly placed spheres in order to compute the VPF. To do so,
the spheres will be dropped inside a cubical box whose sides are defined by
the smallest/largest coordinate distance of the input ``sample1``.
Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

Number of threads to use in calculation, where parallelization is performed
using the python ``multiprocessing`` module. Default is 1 for a purely serial
calculation, in which case a multiprocessing Pool object will
never be instantiated. A string 'max' may be used to indicate that
the pair counters should use all available cores on the machine.

approx_cell1_size : array_like, optional
Length-3 array serving as a guess for the optimal manner by how points
will be apportioned into subvolumes of the simulation box.
The optimum choice unavoidably depends on the specs of your machine.
Default choice is to use Lbox/10 in each dimension,
which will return reasonable result performance for most use-cases.
Performance can vary sensitively with this parameter, so it is highly
recommended that you experiment with this parameter when carrying out
performance-critical calculations.

approx_cellran_size : array_like, optional
Analogous to ``approx_cell1_size``, but for randoms.  See comments for
``approx_cell1_size`` for details.

seed : int, optional
Random number seed used to randomly lay down spheres, if applicable.
Default is None, in which case results will be stochastic.

Returns
-------
vpf : numpy.array
*len(rbins)* length array containing the void probability function
:math:`P_0(r)` computed for each :math:`r` defined by input ``rbins``.

Notes
-----
This function requires the calculation of the number of pairs per randomly placed
sphere, and thus storage of an array of shape(n_ran,len(rbins)).  This can be a
memory intensive process as this array becomes large.

Examples
--------
For demonstration purposes we create a randomly distributed set of points within a
periodic unit cube.

>>> Npts = 10000
>>> Lbox = 1.0
>>> period = np.array([Lbox,Lbox,Lbox])

>>> x = np.random.random(Npts)
>>> y = np.random.random(Npts)
>>> z = np.random.random(Npts)

We transform our *x, y, z* points into the array shape used by the pair-counter by
taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
is used throughout the `~halotools.mock_observables` sub-package:

>>> coords = np.vstack((x,y,z)).T

>>> rbins = np.logspace(-2,-1,20)
>>> n_ran = 1000
>>> vpf = void_prob_func(coords, rbins, n_ran=n_ran, period=period)

----------
:ref:`galaxy_catalog_analysis_tutorial8`

"""
(
sample1,
rbins,
n_ran,
random_sphere_centers,
period,
approx_cell1_size,
approx_cellran_size,
) = _void_prob_func_process_args(
sample1,
rbins,
n_ran,
random_sphere_centers,
period,
approx_cell1_size,
approx_cellran_size,
seed,
)

result = npairs_per_object_3d(
random_sphere_centers,
sample1,
rbins,
period=period,
approx_cell1_size=approx_cell1_size,
approx_cell2_size=approx_cellran_size,
)

num_empty_spheres = np.array(
[sum(result[:, i] == 0) for i in range(result.shape[1])]
)
return num_empty_spheres / n_ran

def _void_prob_func_process_args(
sample1,
rbins,
n_ran,
random_sphere_centers,
period,
approx_cell1_size,
approx_cellran_size,
seed,
):
""" """
sample1 = np.atleast_1d(sample1)

rbins = np.atleast_1d(rbins)
try:
assert rbins.ndim == 1
assert len(rbins) > 1
assert np.min(rbins) > 0
if len(rbins) > 2:
assert array_is_monotonic(rbins, strict=True) == 1
except AssertionError:
msg = (
"\n Input ``rbins`` must be a monotonically increasing \n"
"1-D array with at least two entries. All entries must be strictly positive."
)
raise HalotoolsError(msg)

if period is None:
xmin, xmax = np.min(sample1), np.max(sample1)
ymin, ymax = np.min(sample1), np.max(sample1)
zmin, zmax = np.min(sample1), np.max(sample1)
else:
period = np.atleast_1d(period)
if len(period) == 1:
period = np.array([period, period, period])
elif len(period) == 3:
pass
else:
msg = "\nInput ``period`` must either be a float or length-3 sequence"
raise HalotoolsError(msg)
xmin, xmax = 0.0, float(period[0])
ymin, ymax = 0.0, float(period[1])
zmin, zmax = 0.0, float(period[2])

if n_ran is None:
if random_sphere_centers is None:
msg = "You must pass either ``n_ran`` or ``random_sphere_centers``"
raise HalotoolsError(msg)
else:
random_sphere_centers = np.atleast_1d(random_sphere_centers)
try:
assert random_sphere_centers.shape[1] == 3
except AssertionError:
msg = (
"Your input ``random_sphere_centers`` must have shape (Nspheres, 3)"
)
raise HalotoolsError(msg)
n_ran = float(random_sphere_centers.shape[0])
else:
if random_sphere_centers is not None:
msg = "If passing in ``random_sphere_centers``, do not also pass in ``n_ran``."
raise HalotoolsError(msg)
else:
with NumpyRNGContext(seed):
xran = np.random.uniform(xmin, xmax, n_ran)
yran = np.random.uniform(ymin, ymax, n_ran)
zran = np.random.uniform(zmin, zmax, n_ran)
random_sphere_centers = np.vstack([xran, yran, zran]).T

return (
sample1,
rbins,
n_ran,
random_sphere_centers,
period,