""" Module containing the `~halotools.mock_observables.pairwise_distance_3d` function
used to find pairs and their separation distance.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import multiprocessing
from functools import partial
from scipy.sparse import coo_matrix
from .pairwise_distance_3d import _get_r_max
from .rectangular_mesh import RectangularDoubleMesh
from .mesh_helpers import _set_approximate_cell_sizes, _enclose_in_box, _cell1_parallelization_indices
from .cpairs import pairwise_distance_xy_z_engine
from ...utils.array_utils import custom_len
__all__ = ('pairwise_distance_xy_z', )
__author__ = ('Andrew Hearin', 'Duncan Campbell')
[docs]
def pairwise_distance_xy_z(data1, data2, rp_max, pi_max, period=None,
verbose=False, num_threads=1,
approx_cell1_size=None, approx_cell2_size=None):
"""
Function returns pairs of points separated by
a xy-projected distance smaller than or equal to the input ``rp_max`` and z distance ``pi_max``.
Note that if data1 == data2 that the
`~halotools.mock_observables.pairwise_distance_xy_z` function double-counts pairs.
Parameters
----------
data1 : array_like
N1 by 3 numpy array of 3-dimensional positions.
Values of each dimension should be between zero and the corresponding dimension
of the input period.
data2 : array_like
N2 by 3 numpy array of 3-dimensional positions.
Values of each dimension should be between zero and the corresponding dimension
of the input period.
rp_max : array_like
radius of the cylinder to search for neighbors around galaxies in ``data1``.
If a single float is given, ``rp_max`` is assumed to be the same for each galaxy in
``data1``. You may optionally pass in an array of length *Npts1*, in which case
each point in ``data1`` will have its own individual neighbor-search projected radius.
Length units assumed to be in Mpc/h, here and throughout Halotools.
pi_max : array_like
Half-length of cylinder to search for neighbors around galaxies in ``data1``.
If a single float is given, ``pi_max`` is assumed to be the same for each galaxy in
``data1``. You may optionally pass in an array of length *Npts1*, in which case
each point in ``data1`` will have its own individual neighbor-search cylinder half-length.
Length units assumed to be in Mpc/h, here and throughout Halotools.
period : array_like, optional
Length-3 array defining the periodic boundary conditions.
If only one number is specified, the enclosing volume is assumed to
be a periodic cube (by far the most common case).
If period is set to None, the default option,
PBCs are set to infinity.
verbose : Boolean, optional
If True, print out information and progress.
num_threads : int, optional
Number of CPU cores to use in the pair counting.
If ``num_threads`` is set to the string 'max', use all available cores.
Default is 1 thread for a serial calculation that
does not open a multiprocessing pool.
approx_cell1_size : array_like, optional
Length-3 array serving as a guess for the optimal manner by which
the `~halotools.mock_observables.pair_counters.RectangularDoubleMesh`
will apportion the ``data`` points into subvolumes of the simulation box.
The optimum choice unavoidably depends on the specs of your machine.
Default choice is to use 1/10 of the box size in each dimension,
which will return reasonable result performance for most use-cases.
Performance can vary sensitively with this parameter, so it is highly
recommended that you experiment with this parameter when carrying out
performance-critical calculations.
approx_cell2_size : array_like, optional
See comments for ``approx_cell1_size``.
Returns
-------
distance : `~scipy.sparse.coo_matrix`
sparse matrix in COO format containing distances
between the ith entry in ``data1`` and jth in ``data2``.
Examples
--------
For demonstration purposes we create randomly distributed sets of points within a
periodic unit cube.
>>> Npts1, Npts2, Lbox = 1000, 1000, 250.
>>> period = [Lbox, Lbox, Lbox]
>>> rp_max = 1.0
>>> pi_max = 2.0
>>> x1 = np.random.uniform(0, Lbox, Npts1)
>>> y1 = np.random.uniform(0, Lbox, Npts1)
>>> z1 = np.random.uniform(0, Lbox, Npts1)
>>> x2 = np.random.uniform(0, Lbox, Npts2)
>>> y2 = np.random.uniform(0, Lbox, Npts2)
>>> z2 = np.random.uniform(0, Lbox, Npts2)
We transform our *x, y, z* points into the array shape used by the pair-counter by
taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
is used throughout the `~halotools.mock_observables` sub-package:
>>> data1 = np.vstack([x1, y1, z1]).T
>>> data2 = np.vstack([x2, y2, z2]).T
>>> perp_dist_matrix, para_dist_matrix = pairwise_distance_xy_z(data1, data2, rp_max, pi_max, period = period)
"""
# Process the inputs with the helper function
result = _pairwise_distance_xy_z_process_args(data1, data2, rp_max, pi_max, period,
verbose, num_threads, approx_cell1_size, approx_cell2_size)
x1in, y1in, z1in, x2in, y2in, z2in = result[0:6]
rp_max, max_rp_max, pi_max, max_pi_max, period, num_threads, PBCs, approx_cell1_size, approx_cell2_size = result[6:]
xperiod, yperiod, zperiod = period
search_xlength, search_ylength, search_zlength = max_rp_max, max_rp_max, max_pi_max
# Compute the estimates for the cell sizes
approx_cell1_size, approx_cell2_size = (
_set_approximate_cell_sizes(approx_cell1_size, approx_cell2_size, period)
)
approx_x1cell_size, approx_y1cell_size, approx_z1cell_size = approx_cell1_size
approx_x2cell_size, approx_y2cell_size, approx_z2cell_size = approx_cell2_size
# Build the rectangular mesh
double_mesh = RectangularDoubleMesh(x1in, y1in, z1in, x2in, y2in, z2in,
approx_x1cell_size, approx_y1cell_size, approx_z1cell_size,
approx_x2cell_size, approx_y2cell_size, approx_z2cell_size,
search_xlength, search_ylength, search_zlength, xperiod, yperiod, zperiod, PBCs)
# Create a function object that has a single argument, for parallelization purposes
engine = partial(pairwise_distance_xy_z_engine,
double_mesh, x1in, y1in, z1in, x2in, y2in, z2in, rp_max, pi_max)
# Calculate the cell1 indices that will be looped over by the engine
num_threads, cell1_tuples = _cell1_parallelization_indices(
double_mesh.mesh1.ncells, num_threads)
if num_threads > 1:
pool = multiprocessing.Pool(num_threads)
result = pool.map(engine, cell1_tuples)
pool.close()
else:
result = [engine(cell1_tuples[0])]
# unpack result
d_perp = np.zeros((0,), dtype='float')
d_para = np.zeros((0,), dtype='float')
i_inds = np.zeros((0,), dtype='int')
j_inds = np.zeros((0,), dtype='int')
# unpack the results
for i in range(len(result)):
d_perp = np.append(d_perp, result[i][0])
d_para = np.append(d_para, result[i][1])
i_inds = np.append(i_inds, result[i][2])
j_inds = np.append(j_inds, result[i][3])
return (coo_matrix((d_perp, (i_inds, j_inds)), shape=(len(data1), len(data2))),
coo_matrix((d_para, (i_inds, j_inds)), shape=(len(data1), len(data2))))
def _pairwise_distance_xy_z_process_args(data1, data2, rp_max, pi_max, period,
verbose, num_threads, approx_cell1_size, approx_cell2_size):
"""
helper function to process arguments for `~halotools.mock_observables.pairwise_distance_3d function.
"""
if num_threads is not 1:
if num_threads == 'max':
num_threads = multiprocessing.cpu_count()
if not isinstance(num_threads, int):
msg = "Input ``num_threads`` argument must be an integer or the string 'max'"
raise ValueError(msg)
# Passively enforce that we are working with ndarrays
x1 = data1[:, 0]
y1 = data1[:, 1]
z1 = data1[:, 2]
x2 = data2[:, 0]
y2 = data2[:, 1]
z2 = data2[:, 2]
rp_max = _get_r_max(data1, rp_max)
pi_max = _get_r_max(data1, pi_max)
max_rp_max = np.amax(rp_max)
max_pi_max = np.amax(pi_max)
# Set the boolean value for the PBCs variable
if period is None:
PBCs = False
x1, y1, z1, x2, y2, z2, period = (
_enclose_in_box(x1, y1, z1, x2, y2, z2,
min_size=[max_rp_max*3.0, max_rp_max*3.0, max_pi_max*3.0]))
else:
PBCs = True
period = np.atleast_1d(period).astype(float)
if len(period) == 1:
period = np.array([period[0]]*3)
try:
assert np.all(period < np.inf)
assert np.all(period > 0)
except AssertionError:
msg = "Input ``period`` must be a bounded positive number in all dimensions"
raise ValueError(msg)
if approx_cell1_size is None:
approx_cell1_size = [max_rp_max, max_rp_max, max_pi_max]
elif custom_len(approx_cell1_size) == 1:
approx_cell1_size = [approx_cell1_size, approx_cell1_size, approx_cell1_size]
if approx_cell2_size is None:
approx_cell2_size = [max_rp_max, max_rp_max, max_pi_max]
elif custom_len(approx_cell2_size) == 1:
approx_cell2_size = [approx_cell2_size, approx_cell2_size, approx_cell2_size]
return (x1, y1, z1, x2, y2, z2,
rp_max, max_rp_max, pi_max, max_pi_max, period, num_threads, PBCs,
approx_cell1_size, approx_cell2_size)