Source code for halotools.mock_observables.isolation_functions.cylindrical_isolation

"""
Module containing the `~halotools.mock_observables.cylindrical_isolation` function
used to apply a simple 3d isolation criteria to a set of points in a periodic box.
"""
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
from functools import partial
import multiprocessing

from .isolation_functions_helpers import _get_r_max, _set_isolation_approx_cell_sizes
from .engines import cylindrical_isolation_engine

from ..mock_observables_helpers import enforce_sample_respects_pbcs, get_num_threads, get_period

from ..pair_counters.rectangular_mesh import RectangularDoubleMesh
from ..pair_counters.mesh_helpers import (
    _set_approximate_cell_sizes, _cell1_parallelization_indices, _enclose_in_box,
    _enforce_maximum_search_length)

__all__ = ('cylindrical_isolation', )

__author__ = ['Andrew Hearin', 'Duncan Campbell']

np.seterr(divide='ignore', invalid='ignore')  # ignore divide by zero


[docs] def cylindrical_isolation(sample1, sample2, rp_max, pi_max, period=None, num_threads=1, approx_cell1_size=None, approx_cell2_size=None): """ Determine whether a set of points, ``sample1``, is isolated, i.e. does not have a neighbor in ``sample2`` within an user specified cylindrical volume centered at each point in ``sample1``. See also :ref:`galaxy_catalog_analysis_tutorial10` for example usage on a mock galaxy catalog. Parameters ---------- sample1 : array_like *Npts1 x 3* numpy array containing 3-D positions of points. See the :ref:`mock_obs_pos_formatting` documentation page, or the Examples section below, for instructions on how to transform your coordinate position arrays into the format accepted by the ``sample1`` and ``sample2`` arguments. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. sample2 : array_like *Npts2 x 3* numpy array containing 3-D positions of points. rp_max : array_like radius of the cylinder to search for neighbors around galaxies in ``sample1``. If a single float is given, ``rp_max`` is assumed to be the same for each galaxy in ``sample1``. You may optionally pass in an array of length *Npts1*, in which case each point in ``sample1`` will have its own individual neighbor-search projected radius. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. pi_max : array_like Half-length of cylinder to search for neighbors around galaxies in ``sample1``. If a single float is given, ``pi_max`` is assumed to be the same for each galaxy in ``sample1``. You may optionally pass in an array of length *Npts1*, in which case each point in ``sample1`` will have its own individual neighbor-search cylinder half-length. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. period : array_like, optional Length-3 sequence defining the periodic boundary conditions in each dimension. If you instead provide a single scalar, Lbox, period is assumed to be the same in all Cartesian directions. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools. num_threads : int, optional Number of threads to use in calculation, where parallelization is performed using the python ``multiprocessing`` module. Default is 1 for a purely serial calculation, in which case a multiprocessing Pool object will never be instantiated. A string 'max' may be used to indicate that the pair counters should use all available cores on the machine. approx_cell1_size : array_like, optional Length-3 array serving as a guess for the optimal manner by how points will be apportioned into subvolumes of the simulation box. The optimum choice unavoidably depends on the specs of your machine. Default choice is to use ``rp_max``/10 in the xy-dimensions and ``pi_max``/10 in the z-dimension, which will return reasonable result performance for most use-cases. Performance can vary sensitively with this parameter, so it is highly recommended that you experiment with this parameter when carrying out performance-critical calculations. approx_cell2_size : array_like, optional Analogous to ``approx_cell1_size``, but for ``sample2``. See comments for ``approx_cell1_size`` for details. Returns ------- is_isolated : numpy.array array of booleans indicating if each point in `sample1` is isolated. Examples -------- First we create a randomly distributed set of points ``sample1``, together with random z-velocities for those points. We will then place ``sample1`` into redshift-space using the `~halotools.mock_observables.return_xyz_formatted_array` function. We will use the `~halotools.mock_observables.cylindrical_isolation` function to determine which points in ``sample1`` have zero neighbors inside a cylinder of radius ``rp_max`` and half-length ``pi_max``. >>> Npts = 1000 >>> Lbox = 250.0 >>> period = Lbox >>> x = np.random.uniform(0, Lbox, Npts) >>> y = np.random.uniform(0, Lbox, Npts) >>> z = np.random.uniform(0, Lbox, Npts) >>> vz = np.random.normal(loc = 0, scale = 100, size = Npts) We place our points into redshift-space, formatting the result into the appropriately shaped array used throughout the `~halotools.mock_observables` sub-package: >>> from halotools.mock_observables import return_xyz_formatted_array >>> sample1 = return_xyz_formatted_array(x, y, z, period = Lbox, velocity = vz, velocity_distortion_dimension='z') Now we will call `cylindrical_isolation` with ``sample2`` set to ``sample1``, applying a projected separation cut of 500 kpc/h, and a line-of-sight velocity cut of 750 km/s. Note that Halotools assumes *h=1* throughout the package, and that all Halotools length-units are in Mpc/h. >>> rp_max = 0.5 # 500 kpc/h cut in perpendicular direction Since *h=1* implies :math:`H_{0} = 100` km/s/Mpc, our 750 km/s velocity criteria gets transformed into a z-dimension length criteria as: >>> H0 = 100.0 >>> pi_max = 750./H0 >>> is_isolated = cylindrical_isolation(sample1, sample1, rp_max, pi_max, period=period) In the next example that follows, ``sample2`` will be a different set of points from ``sample1``, so we will determine which points in ``sample1`` have a neighbor in ``sample2`` located inside a cylinder of radius ``rp_max`` and half-length ``pi_max``. >>> x2 = np.random.uniform(0, Lbox, Npts) >>> y2 = np.random.uniform(0, Lbox, Npts) >>> z2 = np.random.uniform(0, Lbox, Npts) >>> vz2 = np.random.normal(loc = 0, scale = 100, size = Npts) >>> sample2 = return_xyz_formatted_array(x2, y2, z2, period = Lbox, velocity = vz2, velocity_distortion_dimension='z') >>> is_isolated = cylindrical_isolation(sample1, sample2, rp_max, pi_max, period=period) Notes ----- There is one edge-case of all the isolation criteria functions worthy of special mention. Suppose there exists a point *p* in ``sample1`` with the exact same spatial coordinates as one or more points in ``sample2``. The matching point(s) in ``sample2`` will **not** be considered neighbors of *p*. """ # Process the inputs with the helper function result = _cylindrical_isolation_process_args(sample1, sample2, rp_max, pi_max, period, num_threads, approx_cell1_size, approx_cell2_size) x1in, y1in, z1in, x2in, y2in, z2in = result[0:6] rp_max, max_rp_max, pi_max, max_pi_max, period, num_threads, PBCs, approx_cell1_size, approx_cell2_size = result[6:] xperiod, yperiod, zperiod = period search_xlength, search_ylength, search_zlength = max_rp_max, max_rp_max, max_pi_max # Compute the estimates for the cell sizes approx_cell1_size, approx_cell2_size = ( _set_approximate_cell_sizes(approx_cell1_size, approx_cell2_size, period) ) approx_x1cell_size, approx_y1cell_size, approx_z1cell_size = approx_cell1_size approx_x2cell_size, approx_y2cell_size, approx_z2cell_size = approx_cell2_size # Build the rectangular mesh double_mesh = RectangularDoubleMesh(x1in, y1in, z1in, x2in, y2in, z2in, approx_x1cell_size, approx_y1cell_size, approx_z1cell_size, approx_x2cell_size, approx_y2cell_size, approx_z2cell_size, search_xlength, search_ylength, search_zlength, xperiod, yperiod, zperiod, PBCs) # Create a function object that has a single argument, for parallelization purposes engine = partial(cylindrical_isolation_engine, double_mesh, x1in, y1in, z1in, x2in, y2in, z2in, rp_max, pi_max) # Calculate the cell1 indices that will be looped over by the engine num_threads, cell1_tuples = _cell1_parallelization_indices( double_mesh.mesh1.ncells, num_threads) if num_threads > 1: pool = multiprocessing.Pool(num_threads) result = pool.map(engine, cell1_tuples) counts = np.sum(np.array(result), axis=0) pool.close() else: counts = engine(cell1_tuples[0]) is_isolated = np.array(counts, dtype=bool) return is_isolated
def _cylindrical_isolation_process_args(sample1, sample2, rp_max, pi_max, period, num_threads, approx_cell1_size, approx_cell2_size): """ private function to process the arguents for cylindrical isolation functions """ num_threads = get_num_threads(num_threads) rp_max = _get_r_max(sample1, rp_max) pi_max = _get_r_max(sample1, pi_max) max_rp_max = np.amax(rp_max) max_pi_max = np.amax(pi_max) period, PBCs = get_period(period) # At this point, period may still be set to None, # in which case we must remap our points inside the smallest enclosing cube # and set ``period`` equal to this cube size. if period is None: x1, y1, z1, x2, y2, z2, period = ( _enclose_in_box( sample1[:, 0], sample1[:, 1], sample1[:, 2], sample2[:, 0], sample2[:, 1], sample2[:, 2], min_size=[max_rp_max*3.0, max_rp_max*3.0, max_pi_max*3.0])) else: x1 = sample1[:, 0] y1 = sample1[:, 1] z1 = sample1[:, 2] x2 = sample2[:, 0] y2 = sample2[:, 1] z2 = sample2[:, 2] _enforce_maximum_search_length(max_rp_max, period[0]) _enforce_maximum_search_length(max_rp_max, period[1]) _enforce_maximum_search_length(max_pi_max, period[2]) enforce_sample_respects_pbcs(x1, y1, z1, period) enforce_sample_respects_pbcs(x2, y2, z2, period) approx_cell1_size, approx_cell2_size = _set_isolation_approx_cell_sizes( approx_cell1_size, approx_cell2_size, max_rp_max, max_rp_max, max_pi_max) return (x1, y1, z1, x2, y2, z2, rp_max, max_rp_max, pi_max, max_pi_max, period, num_threads, PBCs, approx_cell1_size, approx_cell2_size)