""" Module containing the `~halotools.mock_observables.counts_in_cylinders` function
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import multiprocessing
from functools import partial
from .engines import counts_in_cylinders_engine
from ..mock_observables_helpers import get_num_threads, get_period
from ..pair_counters.rectangular_mesh import RectangularDoubleMesh
from ..pair_counters.mesh_helpers import (
_set_approximate_cell_sizes,
_cell1_parallelization_indices,
_enclose_in_box,
_enforce_maximum_search_length,
)
from ...utils.array_utils import custom_len
__author__ = ("Andrew Hearin",)
__all__ = ("counts_in_cylinders",)
[docs]
def counts_in_cylinders(
sample1,
sample2,
proj_search_radius,
cylinder_half_length,
period=None,
verbose=False,
num_threads=1,
approx_cell1_size=None,
approx_cell2_size=None,
return_indexes=False,
condition=None,
condition_args=(),
):
"""
Function counts the number of points in ``sample2`` separated by a xy-distance
*r* and z-distance *z* from each point in ``sample1``,
where *r* and *z* are defined by the input ``proj_search_radius``
and ``cylinder_half_length``, respectively.
Parameters
----------
sample1 : array_like
Npts1 x 3 numpy array containing 3-D positions of points.
See the :ref:`mock_obs_pos_formatting` documentation page, or the
Examples section below, for instructions on how to transform
your coordinate position arrays into the
format accepted by the ``sample1`` and ``sample2`` arguments.
Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.
sample2 : array_like
Npts2 x 3 array containing 3-D positions of points. If this is None, an
autocorrelation on sample1 will be done instead.
proj_search_radius : array_like
Length-Npts1 array defining the xy-distance around each point in ``sample1``
to search for points in ``sample2``.
Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.
cylinder_half_length : array_like
Length-Npts1 array defining the z-distance around each point in ``sample1``
to search for points in ``sample2``. Thus the *total* length of the cylinder
placed around each point in ``sample1`` will be *twice* the corresponding
value stored in the input ``cylinder_half_length``.
Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.
period : array_like, optional
Length-3 array defining the periodic boundary conditions.
If only one number is specified, the enclosing volume is assumed to
be a periodic cube (by far the most common case).
If period is set to None, the default option,
PBCs are set to infinity.
verbose : Boolean, optional
If True, print out information and progress.
num_threads : int, optional
Number of threads to use in calculation, where parallelization is performed
using the python ``multiprocessing`` module. Default is 1 for a purely serial
calculation, in which case a multiprocessing Pool object will
never be instantiated. A string 'max' may be used to indicate that
the pair counters should use all available cores on the machine.
approx_cell1_size : array_like, optional
Length-3 array serving as a guess for the optimal manner by how points
will be apportioned into subvolumes of the simulation box.
The optimum choice unavoidably depends on the specs of your machine.
Default choice is to use Lbox/10 in each dimension,
which will return reasonable result performance for most use-cases.
Performance can vary sensitively with this parameter, so it is highly
recommended that you experiment with this parameter when carrying out
performance-critical calculations.
approx_cell2_size : array_like, optional
Analogous to ``approx_cell1_size``, but for sample2. See comments for
``approx_cell1_size`` for details.
return_indexes: bool, optional
If true, return both counts and the indexes of the pairs.
condition : str, optional
Require a condition to be met for a pair to be counted.
See options below:
None | "always_true": Count all pairs in cylinder
"mass_frac":
Only count pairs which satisfy lim[0] < mass2/mass1 < lim[1]
condition_args : tuple, optional
Arguments passed to the condition constructor
None | "always_true": condition_args will be ignored
"mass_frac":
-mass1 (array of mass of sample 1; required)
-mass2 (array of mass of sample 2; required)
-lim (tuple of min,max; required)
-lower_equality (bool to use lim[0] <= m2/m1; optional)
-upper_equality (bool to use m2/m1 <= lim[1]; optional)
Returns
-------
num_pairs : array_like
Numpy array of length Npts1 storing the numbers of points in ``sample2``
inside the cylinder surrounding each point in ``sample1``.
indexes : array_like, optional
If ``return_indexes`` is true, return a structured array of length
num_pairs with the indexes of the pairs. Column ``i1`` is the index in
``sample1`` at the center of the cylinder and column ``i2`` is the index
in ``sample2`` that is contained in the cylinder.
Examples
--------
For illustration purposes, we'll create some fake data and call the pair counter.
>>> from halotools.sim_manager import FakeSim
>>> halocat = FakeSim()
In this first example, we'll demonstrate how to calculate the number of
low-mass host halos are in cylinders of *fixed length* surrounding high-mass halos.
>>> host_halo_mask = halocat.halo_table['halo_upid'] == -1
>>> host_halos = halocat.halo_table[host_halo_mask]
>>> high_mass_mask = host_halos['halo_mvir'] >= 5e13
>>> high_mass_hosts = host_halos[high_mass_mask]
>>> low_mass_mask = host_halos['halo_mvir'] <= 1e12
>>> low_mass_hosts = host_halos[low_mass_mask]
>>> x1, y1, z1 = high_mass_hosts['halo_x'], high_mass_hosts['halo_y'], high_mass_hosts['halo_z']
>>> x2, y2, z2 = low_mass_hosts['halo_x'], low_mass_hosts['halo_y'], low_mass_hosts['halo_z']
We transform our *x, y, z* points into the array shape used by the pair-counter by
taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
is used throughout the `~halotools.mock_observables` sub-package:
>>> sample1 = np.vstack([x1, y1, z1]).T
>>> sample2 = np.vstack([x2, y2, z2]).T
Now let's drop a cylinder of radius 200 kpc/h and half-length 250 kpc/h around
each high-mass host halo, and for each high-mass host we'll count the number of
low-mass halos falling within that cylinder:
>>> period = halocat.Lbox
>>> proj_search_radius, cylinder_half_length = 0.2, 0.25
>>> result = counts_in_cylinders(sample1, sample2, proj_search_radius, cylinder_half_length, period=period)
For example usage of the `~halotools.mock_observables.counts_in_cylinders` function
on a realistic galaxy catalog that makes use of the variable search length feature,
see the :ref:`calculating_counts_in_cells` tutorial.
"""
# Process the inputs with the helper function
result = _counts_in_cylinders_process_args(
sample1,
sample2,
proj_search_radius,
cylinder_half_length,
period,
verbose,
num_threads,
approx_cell1_size,
approx_cell2_size,
return_indexes,
)
(
x1in,
y1in,
z1in,
x2in,
y2in,
z2in,
proj_search_radius,
cylinder_half_length,
) = result[0:8]
period, num_threads, PBCs, approx_cell1_size, approx_cell2_size, autocorr = result[
8:
]
xperiod, yperiod, zperiod = period
rp_max = np.max(proj_search_radius)
pi_max = np.max(cylinder_half_length)
search_xlength, search_ylength, search_zlength = rp_max, rp_max, pi_max
# Compute the estimates for the cell sizes
approx_cell1_size, approx_cell2_size = _set_approximate_cell_sizes(
approx_cell1_size, approx_cell2_size, period
)
approx_x1cell_size, approx_y1cell_size, approx_z1cell_size = approx_cell1_size
approx_x2cell_size, approx_y2cell_size, approx_z2cell_size = approx_cell2_size
# Build the rectangular mesh
double_mesh = RectangularDoubleMesh(
x1in,
y1in,
z1in,
x2in,
y2in,
z2in,
approx_x1cell_size,
approx_y1cell_size,
approx_z1cell_size,
approx_x2cell_size,
approx_y2cell_size,
approx_z2cell_size,
search_xlength,
search_ylength,
search_zlength,
xperiod,
yperiod,
zperiod,
PBCs,
)
# Create a function object that has a single argument, for parallelization purposes
engine = partial(
counts_in_cylinders_engine,
double_mesh,
x1in,
y1in,
z1in,
x2in,
y2in,
z2in,
proj_search_radius,
cylinder_half_length,
return_indexes,
condition,
condition_args,
)
# Calculate the cell1 indices that will be looped over by the engine
num_threads, cell1_tuples = _cell1_parallelization_indices(
double_mesh.mesh1.ncells, num_threads
)
if num_threads > 1:
pool = multiprocessing.Pool(num_threads)
result = pool.map(engine, cell1_tuples)
pool.close()
if return_indexes:
counts = np.sum([res[0] for res in result], axis=0)
indexes = np.concatenate([res[1] for res in result])
else:
counts = np.sum(result, axis=0)
else:
result = engine(cell1_tuples[0])
if return_indexes:
counts = result[0]
indexes = result[1]
else:
counts = result
# All pairs will match with themselves. We don't want this if we are doing an autocorr
if autocorr:
counts -= 1
if return_indexes:
indexes = indexes[indexes["i1"] != indexes["i2"]]
if return_indexes:
return counts, indexes
return counts
def _counts_in_cylinders_process_args(
sample1,
sample2,
proj_search_radius,
cylinder_half_length,
period,
verbose,
num_threads,
approx_cell1_size,
approx_cell2_size,
return_indexes,
):
""""""
num_threads = get_num_threads(num_threads)
autocorr = False
if sample2 is None:
sample2, autocorr = sample1, True
# The engine expects position arrays to be double-precision
sample1 = np.asarray(sample1, dtype=np.float64)
sample2 = np.asarray(sample2, dtype=np.float64)
# Passively enforce that we are working with ndarrays
x1 = sample1[:, 0]
y1 = sample1[:, 1]
z1 = sample1[:, 2]
x2 = sample2[:, 0]
y2 = sample2[:, 1]
z2 = sample2[:, 2]
if return_indexes and ((len(x1) > 2**32) or (len(x2) > 2**32)):
msg = (
"Return indexes uses a uint32 and so can only handle inputs shorter than "
+ "2^32 (~4 Billion). If you are affected by this please raise an Issue on "
+ "https://github.com/astropy/halotools.\n"
)
raise ValueError(msg)
proj_search_radius = np.atleast_1d(proj_search_radius).astype("f8")
if len(proj_search_radius) == 1:
proj_search_radius = np.zeros_like(x1) + proj_search_radius[0]
elif len(proj_search_radius) == len(x1):
pass
else:
msg = "Input ``proj_search_radius`` must be a scalar or length-Npts1 array"
raise ValueError(msg)
max_rp_max = np.max(proj_search_radius)
cylinder_half_length = np.atleast_1d(cylinder_half_length).astype("f8")
if len(cylinder_half_length) == 1:
cylinder_half_length = np.zeros_like(x1) + cylinder_half_length[0]
elif len(cylinder_half_length) == len(x1):
pass
else:
msg = "Input ``cylinder_half_length`` must be a scalar or length-Npts1 array"
raise ValueError(msg)
max_pi_max = np.max(cylinder_half_length)
period, PBCs = get_period(period)
# At this point, period may still be set to None,
# in which case we must remap our points inside the smallest enclosing cube
# and set ``period`` equal to this cube size.
if period is None:
x1, y1, z1, x2, y2, z2, period = _enclose_in_box(
sample1[:, 0],
sample1[:, 2],
sample1[:, 2],
sample2[:, 0],
sample2[:, 2],
sample2[:, 2],
min_size=[max_rp_max * 3.0, max_rp_max * 3.0, max_pi_max * 3.0],
)
x1 = sample1[:, 0]
y1 = sample1[:, 1]
z1 = sample1[:, 2]
x2 = sample2[:, 0]
y2 = sample2[:, 1]
z2 = sample2[:, 2]
else:
x1 = sample1[:, 0]
y1 = sample1[:, 1]
z1 = sample1[:, 2]
x2 = sample2[:, 0]
y2 = sample2[:, 1]
z2 = sample2[:, 2]
_enforce_maximum_search_length(max_rp_max, period[0])
_enforce_maximum_search_length(max_rp_max, period[1])
_enforce_maximum_search_length(max_pi_max, period[2])
if approx_cell1_size is None:
approx_cell1_size = [max_rp_max, max_rp_max, max_pi_max]
elif custom_len(approx_cell1_size) == 1:
approx_cell1_size = [approx_cell1_size, approx_cell1_size, approx_cell1_size]
if approx_cell2_size is None:
approx_cell2_size = [max_rp_max, max_rp_max, max_pi_max]
elif custom_len(approx_cell2_size) == 1:
approx_cell2_size = [approx_cell2_size, approx_cell2_size, approx_cell2_size]
return (
x1,
y1,
z1,
x2,
y2,
z2,
proj_search_radius,
cylinder_half_length,
period,
num_threads,
PBCs,
approx_cell1_size,
approx_cell2_size,
autocorr,
)