Source code for halotools.mock_observables.catalog_analysis_helpers

r""" Common functions used when analyzing catalogs of galaxies/halos.
"""
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
from scipy.stats import binned_statistic

from ..custom_exceptions import HalotoolsError
from ..empirical_models import enforce_periodicity_of_box
from ..sim_manager.sim_defaults import default_cosmology, default_redshift

__all__ = (
    "mean_y_vs_x",
    "return_xyz_formatted_array",
    "cuboid_subvolume_labels",
    "relative_positions_and_velocities",
    "sign_pbc",
    "apply_zspace_distortion",
)
__author__ = ["Andrew Hearin"]



[docs]
def mean_y_vs_x(x, y, error_estimator="error_on_mean", **kwargs):
    r"""
    Estimate the mean value of the property *y* as a function of *x*
    for an input sample of galaxies/halos,
    optionally returning an error estimate.

    The `mean_y_vs_x` function is just a convenience wrapper
    around `scipy.stats.binned_statistic` and `np.histogram`.

    See also :ref:`galaxy_catalog_analysis_tutorial1`.

    Parameters
    -----------
    x : array_like
        Array storing values of the independent variable of the sample.

    y : array_like
        Array storing values of the dependent variable of the sample.

    bins : array_like, optional
        Bins of the input *x*.
        Defaults are set by `scipy.stats.binned_statistic`.

    error_estimator : string, optional
        If set to ``error_on_mean``, function will also return an array storing
        :math:`\sigma_{y}/\sqrt{N}`, where :math:`\sigma_{y}` is the
        standard deviation of *y* in the bin
        and :math:`\sqrt{N}` is the counts in each bin.

        If set to ``variance``, function will also return an array storing
        :math:`\sigma_{y}`.

        Default is ``error_on_mean``

    Returns
    ----------
    bin_midpoints : array_like
        Midpoints of the *x*-bins.

    mean : array_like
        Mean of *y* estimated in bins

    err : array_like
        Error on *y* estimated in bins

    Examples
    ---------
    >>> from halotools.sim_manager import FakeSim
    >>> halocat = FakeSim()
    >>> halos = halocat.halo_table
    >>> halo_mass, mean_spin, err = mean_y_vs_x(halos['halo_mvir'], halos['halo_spin'])

    See also
    ---------
    :ref:`galaxy_catalog_analysis_tutorial1`

    """
    try:
        assert error_estimator in ("error_on_mean", "variance")
    except AssertionError:
        msg = (
            "\nInput ``error_estimator`` must be either "
            "``error_on_mean`` or ``variance``\n"
        )
        raise HalotoolsError(msg)

    modified_kwargs = {key: kwargs[key] for key in kwargs if key != "error_estimator"}
    result = binned_statistic(x, y, statistic="mean", **modified_kwargs)
    mean, bin_edges, binnumber = result
    bin_midpoints = (bin_edges[1:] + bin_edges[:-1]) / 2.0

    modified_kwargs["bins"] = bin_edges

    result = binned_statistic(x, y, statistic=np.std, **modified_kwargs)
    variance, _, _ = result

    if error_estimator == "variance":
        err = variance
    else:
        counts = np.histogram(x, bins=bin_edges)
        err = variance / np.sqrt(counts[0])

    return bin_midpoints, mean, err




[docs]
def return_xyz_formatted_array(
    x,
    y,
    z,
    period=np.inf,
    cosmology=default_cosmology,
    redshift=default_redshift,
    **kwargs
):
    r"""Returns a Numpy array of shape *(Npts, 3)* storing the
    xyz-positions in the format used throughout
    the `~halotools.mock_observables` package, optionally applying redshift-space
    distortions according to the input ``velocity``, ``redshift`` and ``cosmology``.

    See :ref:`mock_obs_pos_formatting` for a tutorial.

    Parameters
    -----------
    x, y, z : sequence of length-Npts arrays
        Comoving units of Mpc assuming h=1, as throughout Halotools.

    velocity : array, optional
        Length-Npts array of velocities in *physical* units of km/s
        used to apply peculiar velocity distortions, e.g.,
        :math:`z_{\rm dist} = z_{\rm true} + v_{\rm z}/aH`,
        where *a* and *H* are the scale factor and Hubble expansion rate
        evaluated at the input ``redshift``.

        If ``velocity`` argument is passed,
        ``velocity_distortion_dimension`` must also be passed.

    velocity_distortion_dimension : string, optional
        If set to ``'x'``, ``'y'`` or ``'z'``,
        the requested dimension in the returned ``pos`` array
        will be distorted due to peculiar motion.
        For example, if ``velocity_distortion_dimension`` is ``z``,
        then ``pos`` can be treated as physically observed
        galaxy positions under the distant-observer approximation.
        Default is no distortions.

    cosmology : astropy.cosmology.Cosmology, optional
        Cosmology to assume when applying redshift-space distortions,
        e.g., the cosmology of the simulation.
        Default is set in `sim_manager.sim_defaults`.

    redshift : float, optional
        Redshift of the mock galaxy sample,
        e.g., the redshift of the simulation snapshot.
        Default is set in `sim_manager.sim_defaults`.

    mask : array_like, optional
        Boolean mask that can be used to select the positions
        of a subcollection of the galaxies stored in the ``galaxy_table``.

    period : array_like, optional
        Length-3 sequence defining the periodic boundary conditions
        in each dimension. If you instead provide a single scalar,
        period is assumed to be the same in all Cartesian directions.
        If period is not np.inf, then after applying peculiar velocity distortions
        the new coordinates will be remapped into the periodic box.
        Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.

    Returns
    --------
    pos : array_like
        Numpy array with shape *(Npts, 3)* with units of comoving Mpc/h.

    Examples
    ---------
    >>> npts = 100
    >>> Lbox = 250.
    >>> x = np.random.uniform(0, Lbox, npts)
    >>> y = np.random.uniform(0, Lbox, npts)
    >>> z = np.random.uniform(0, Lbox, npts)
    >>> pos = return_xyz_formatted_array(x, y, z, period=Lbox)

    Now we will define an array of random velocities that we will use
    to apply z-space distortions to the z-dimension, assuming the mock galaxy
    sample is at the default redshift. For our random velocities
    we'll assume the values are drawn from a Gaussian centered at zero
    using `numpy.random.normal`.

    >>> velocity = np.random.normal(loc=0, scale=100, size=npts)
    >>> pos = return_xyz_formatted_array(x, y, z, period=Lbox, velocity=velocity, velocity_distortion_dimension='z')

    If we wanted to introduce redshift-space distortions at some higher redshift:

    >>> pos = return_xyz_formatted_array(x, y, z, period=Lbox, velocity=velocity, velocity_distortion_dimension='z', redshift=1.5)

    Notes
    -----
    See :ref:`zspace_distortion_derivation`.

    """
    period = np.atleast_1d(period)
    if len(period) == 1:
        period = np.repeat(period, 3)
    elif len(period) == 3:
        pass
    else:
        msg = "Input ``period`` must be a single float or a 3-element sequence"
        raise ValueError(msg)

    x = np.mod(x, period[0])
    y = np.mod(y, period[1])
    z = np.mod(z, period[2])

    posdict = {"x": np.copy(x), "y": np.copy(y), "z": np.copy(z)}
    period_dict = {"x": period[0], "y": period[1], "z": period[2]}

    a = "velocity_distortion_dimension" in list(kwargs.keys())
    b = "velocity" in list(kwargs.keys())
    if bool(a + b) is True:
        if bool(a * b) is False:
            msg = (
                "You must either both or none of the following keyword arguments: "
                "``velocity_distortion_dimension`` and ``velocity``\n"
            )
            raise KeyError(msg)
        else:
            vel_dist_dim = kwargs["velocity_distortion_dimension"]
            velocity = np.copy(kwargs["velocity"])
            apply_distortion = True
    else:
        apply_distortion = False

    if apply_distortion is True:
        try:
            assert vel_dist_dim in ("x", "y", "z")
            spatial_distortion = (
                (1.0 + redshift) * np.copy(velocity) / 100.0 / cosmology.efunc(redshift)
            )
            posdict[vel_dist_dim] = np.copy(posdict[vel_dist_dim]) + spatial_distortion
            Lbox = period_dict[vel_dist_dim]
            if Lbox != np.inf:
                posdict[vel_dist_dim] = enforce_periodicity_of_box(
                    posdict[vel_dist_dim], Lbox
                )
        except AssertionError:
            msg = (
                "\nInput ``velocity_distortion_dimension`` must be either \n"
                "``'x'``, ``'y'`` or ``'z'``."
            )
            raise KeyError(msg)

    xout, yout, zout = (
        np.copy(posdict["x"]),
        np.copy(posdict["y"]),
        np.copy(posdict["z"]),
    )
    pos = np.vstack([xout, yout, zout]).T

    # Apply a mask, if applicable
    try:
        mask = kwargs["mask"]
        return pos[mask]
    except KeyError:
        return pos




[docs]
def apply_zspace_distortion(
    true_pos, peculiar_velocity, redshift, cosmology, Lbox=None
):
    r"""Apply redshift-space distortions to the comoving simulation coordinate,
    optionally accounting for periodic boundary conditions.

    This function implements the following formula:

    .. math::

        s_{\rm com}^{\rm z-space} = s_{\rm com}^{\rm true} + \frac{1 + z}{H(z)}v_{\rm pec}

    See :ref:`zspace_distortion_derivation` to see where this formula comes from.

    Parameters
    ----------
    true_pos : ndarray
        Array of shape (npts, ) storing the line-of-sight position in comoving Mpc/h.
        In most cases ``true_pos`` is the z-coordinate of the simulation.

    peculiar_velocity : ndarray
        Array of shape (npts, ) storing the peculiar velocity in physical km/s.
        In most cases ``peculiar_velocity`` is the z-velocity of the simulation.

    redshift : float or ndarray
        Float or ndarray of shape (npts, ) storing the redshift of the object.
        If using a single snapshot, this argument is a single float equal to the
        redshift of the snapshot. If using a lightcone, this argument is the
        redshift of each point.

    cosmology : astropy.cosmology.Cosmology
        Cosmology to assume when applying redshift-space distortions,
        e.g., the cosmology of the simulation.

    Lbox : float, optional
        Box length of the simulation so that periodic boundary conditions
        can be applied. Default behavior is None, in which case PBCs will be ignored.

    Returns
    -------
    zspace_pos : ndarray
        Array of shape (npts, ) storing the z-space coordinates in comoving Mpc/h

    Examples
    --------
    >>> from halotools.sim_manager import FakeSim
    >>> halocat = FakeSim()
    >>> true_pos = halocat.halo_table['halo_z']
    >>> peculiar_velocity = halocat.halo_table['halo_vz']
    >>> redshift = halocat.redshift
    >>> cosmology = halocat.cosmology
    >>> Lbox = halocat.Lbox[2]
    >>> zspace_zcoord = apply_zspace_distortion(true_pos, peculiar_velocity, redshift, cosmology, Lbox)
    """
    scale_factor = 1.0 / (1.0 + redshift)
    pos_err = peculiar_velocity / 100.0 / cosmology.efunc(redshift) / scale_factor
    zspace_pos = true_pos + pos_err
    if Lbox is not None:
        zspace_pos = enforce_periodicity_of_box(zspace_pos, Lbox)
    return zspace_pos




[docs]
def cuboid_subvolume_labels(sample, Nsub, Lbox):
    r"""
    Return integer labels indicating which cubical subvolume of a larger cubical volume a
    set of points occupy.

    Parameters
    ----------
    sample : array_like
        Npts x 3 numpy array containing 3-D positions of points.

    Nsub : array_like
        Length-3 numpy array of integers indicating how many times to split the volume
        along each dimension.  If a single integer, N, is supplied, ``Nsub`` is set to
        [N,N,N], and the volume is split along each dimension N times.  The total number
        of subvolumes is given by numpy.prod(Nsub).

    Lbox : array_like
        Length-3 numpy array definging the lengths of the sides of the cubical volume
        that ``sample`` occupies.  If only a single scalar is specified, the volume is assumed
        to be a cube with side-length Lbox

    Returns
    -------
    labels : numpy.array
        (Npts, ) numpy array with integer labels in the range [1,numpy.prod(Nsub)] indicating
        the subvolume each point in ``sample`` occupies.

    N_sub_vol : int
       number of subvolumes.

    Examples
    --------
    For demonstration purposes we create a randomly distributed set of points within a
    periodic unit cube.

    >>> Npts = 1000
    >>> Lbox = 1.0
    >>> period = np.array([Lbox,Lbox,Lbox])

    >>> x = np.random.random(Npts)
    >>> y = np.random.random(Npts)
    >>> z = np.random.random(Npts)

    We transform our *x, y, z* points into the array shape used by the pair-counter by
    taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
    is used throughout the `~halotools.mock_observables` sub-package:

    >>> sample = np.vstack((x,y,z)).T

    Divide the volume into cubes with length 0.25 on a side.

    >>> Nsub = [4,4,4]
    >>> labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
    """

    # process inputs and check for consistency
    sample = np.atleast_1d(sample).astype("f8")
    try:
        assert sample.ndim == 2
        assert sample.shape[1] == 3
    except AssertionError:
        msg = "Input ``sample`` must have shape (Npts, 3)"
        raise TypeError(msg)

    Nsub = np.atleast_1d(Nsub).astype("i4")
    if len(Nsub) == 1:
        Nsub = np.array([Nsub[0], Nsub[0], Nsub[0]])
    elif len(Nsub) != 3:
        msg = "Input ``Nsub`` must be a scalar or length-3 sequence"
        raise TypeError(msg)

    Lbox = np.atleast_1d(Lbox).astype("f8")
    if len(Lbox) == 1:
        Lbox = np.array([Lbox[0]] * 3)
    elif len(Lbox) != 3:
        msg = "Input ``Lbox`` must be a scalar or length-3 sequence"
        raise TypeError(msg)

    dL = Lbox / Nsub  # length of subvolumes along each dimension
    N_sub_vol = int(np.prod(Nsub))  # total the number of subvolumes
    # create an array of unique integer IDs for each subvolume
    inds = np.arange(1, N_sub_vol + 1).reshape(Nsub[0], Nsub[1], Nsub[2])

    # tag each particle with an integer indicating which subvolume it is in
    index = np.floor(sample / dL).astype(int)
    # take care of the case where a point falls on the boundary
    for i in range(3):
        index[:, i] = np.where(index[:, i] == Nsub[i], Nsub[i] - 1, index[:, i])
    index = inds[index[:, 0], index[:, 1], index[:, 2]].astype(int)

    return index, int(N_sub_vol)




[docs]
def sign_pbc(x1, x2, period=None, equality_fill_val=0.0, return_pbc_correction=False):
    r"""Return the sign of the unit vector pointing from x2 towards x1,
    that is, the sign of (x1 - x2), accounting for periodic boundary conditions.

    If x1 > x2, returns 1. If x1 < x2, returns -1. If x1 == x2, returns equality_fill_val.

    Parameters
    ----------
    x1 : array
        1-d array of length *Npts*.
        If period is not None, all values must be contained in [0, Lbox)

    x2 : array
        1-d array of length *Npts*.
        If period is not None, all values must be contained in [0, Lbox)

    period : float, optional
        Size of the periodic box. Default is None for non-periodic case.

    equality_fill_val : float, optional
        Value to return for cases where x1 == x2. Default is 0.

    return_pbc_correction : bool, optional
        If True, the `sign_pbc` function will additionally return a
        length *Npts* boolean array storing whether or not the input
        points had a PBC correction applied. Default is False.

    Returns
    -------
    sgn : array
        1-d array of length *Npts*.

    Examples
    --------
    >>> Lbox = 250.0
    >>> x1 = 1.
    >>> x2 = 249.
    >>> result = sign_pbc(x1, x2, period=Lbox)
    >>> assert result == 1

    >>> result = sign_pbc(x1, x2, period=None)
    >>> assert result == -1

    >>> npts = 100
    >>> x1 = np.random.uniform(0, Lbox, npts)
    >>> x2 = np.random.uniform(0, Lbox, npts)
    >>> result = sign_pbc(x1, x2, period=Lbox)
    """
    x1 = np.atleast_1d(x1)
    x2 = np.atleast_1d(x2)
    result = np.sign(x1 - x2)

    if period is not None:
        try:
            assert np.all(x1 >= 0)
            assert np.all(x2 >= 0)
            assert np.all(x1 < period)
            assert np.all(x2 < period)
        except AssertionError:
            msg = "If period is not None, all values of x and y must be between [0, period)"
            raise ValueError(msg)

        d = np.abs(x1 - x2)
        pbc_correction = np.sign(period / 2.0 - d)
        result = pbc_correction * result

    if equality_fill_val != 0:
        result = np.where(result == 0, equality_fill_val, result)

    if return_pbc_correction:
        return result, pbc_correction
    else:
        return result




[docs]
def relative_positions_and_velocities(x1, x2, period=None, **kwargs):
    r"""Return the vector pointing from x2 towards x1,
    that is, x1 - x2, accounting for periodic boundary conditions.

    If keyword arguments ``v1`` and ``v2`` are passed in,
    additionally return the velocity ``v1`` with respect to ``v2``, with sign convention
    such that positive (negative) values correspond to receding (approaching) points.

    Parameters
    -----------
    x1 : array
        1-d array of length *Npts*.
        If period is not None, all values must be contained in [0, Lbox)

    x2 : array
        1-d array of length *Npts*.
        If period is not None, all values must be contained in [0, Lbox)

    period : float, optional
        Size of the periodic box. Default is None for non-periodic case.

    Returns
    --------
    xrel : array
        1-d array of length *Npts* storing x1 - x2.
        If *x1 > x2* and abs(*x1* - *x2*) > period/2, the sign of *d* will be negative.

    vrel : array, optional
        1-d array of length *Npts* storing v1 relative to v2.
        Only returned if ``v1`` and ``v2`` are passed in.

    Examples
    --------
    >>> Lbox = 250.0
    >>> x1 = 1.
    >>> x2 = 249.
    >>> result = relative_positions_and_velocities(x1, x2, period=Lbox)
    >>> assert np.isclose(result, 2)

    >>> result = relative_positions_and_velocities(x1, x2, period=None)
    >>> assert np.isclose(result, -248)

    >>> npts = 100
    >>> x1 = np.random.uniform(0, Lbox, npts)
    >>> x2 = np.random.uniform(0, Lbox, npts)
    >>> result = relative_positions_and_velocities(x1, x2, period=Lbox)

    Now let's frame this result in terms of a physically motivated example.
    Suppose we have a central galaxy with position *xc* and velocity *vc*,
    and a satellite galaxy with position *xs* and velocity *vs*.
    We can calculate the vector pointing from the central to the satellite,
    as well as the satellites's host-centric velocity:

    >>> xcen, vcen = 249.9, 100
    >>> xsat, vsat = 0.1, -300
    >>> xrel, vrel = relative_positions_and_velocities(xsat, xcen, v1=vsat, v2=vcen, period=Lbox)
    >>> assert np.isclose(xrel, +0.2)
    >>> assert np.isclose(vrel, -400)

    >>> xcen, vcen = 0.1, 100
    >>> xsat, vsat = 249.9, -300
    >>> xrel, vrel = relative_positions_and_velocities(xsat, xcen, v1=vsat, v2=vcen, period=Lbox)
    >>> assert np.isclose(xrel, -0.2)
    >>> assert np.isclose(vrel, +400)

    """
    s = sign_pbc(x1, x2, period=period, equality_fill_val=1.0)
    absd = np.abs(x1 - x2)
    if period is None:
        xrel = s * absd
    else:
        xrel = s * np.where(absd > period / 2.0, period - absd, absd)

    try:
        v1 = kwargs["v1"]
        v2 = kwargs["v2"]
        return xrel, s * (v1 - v2)
    except KeyError:
        return xrel
Navigation

Source code for halotools.mock_observables.catalog_analysis_helpers