Source code for halotools.mock_observables.two_point_clustering.angular_tpcf

r"""
Module containing the `~halotools.mock_observables.angular_tpcf` function used to
calculate galaxy clustering as a function of angular separation.
"""
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np

from .tpcf_estimators import _TP_estimator_requirements, _TP_estimator
from .tpcf_estimators import _TP_estimator_crossx

from .clustering_helpers import verify_tpcf_estimator, process_optional_input_sample2


from ..pair_counters import npairs_3d
from ..mock_observables_helpers import get_num_threads

from ...utils.spherical_geometry import spherical_to_cartesian, chord_to_cartesian
from ...custom_exceptions import HalotoolsError
from ...utils.array_utils import array_is_monotonic

__all__ = ["angular_tpcf"]
__author__ = ["Duncan Campbell"]

np.seterr(divide="ignore", invalid="ignore")  # ignore divide by zero in e.g. DD/RR



[docs]
def angular_tpcf(
    sample1,
    theta_bins,
    sample2=None,
    randoms=None,
    do_auto=True,
    do_cross=True,
    estimator="Natural",
    num_threads=1,
):
    r"""
    Calculate the angular two-point correlation function, :math:`w(\theta)`.

    Example calls to this function appear in the documentation below.
    See the :ref:`mock_obs_pos_formatting` documentation page for
    instructions on how to transform your coordinate position arrays into the
    format accepted by the ``sample1`` argument.

    For a step-by-step tutorial, see :ref:`galaxy_catalog_analysis_tutorial9`.

    Parameters
    ----------
    sample1 : array_like
        Npts1 x 2 numpy array containing ra,dec positions of points in degrees.

    theta_bins : array_like
        array of boundaries defining the angular distance bins in which pairs are
        counted.

    sample2 : array_like, optional
        Npts2 x 2 array containing ra,dec positions of points in degrees.

    randoms : array_like, optional
        Nran x 2 array containing ra,dec positions of points in degrees.  If no randoms
        are provided analytic randoms are used (only valid for for continuous all-sky
        coverage).

    do_auto : boolean, optional
        Boolean determines whether the auto-correlation function will
        be calculated and returned. Default is True.

    do_cross : boolean, optional
        Boolean determines whether the cross-correlation function will
        be calculated and returned. Only relevant when ``sample2`` is also provided.
        Default is True for the case where ``sample2`` is provided, otherwise False.

    estimator : string, optional
        Statistical estimator for the tpcf.
        Options are 'Natural', 'Davis-Peebles', 'Hewett' , 'Hamilton', 'Landy-Szalay'
        Default is ``Natural``.

    num_threads : int, optional
        Number of threads to use in calculation, where parallelization is performed
        using the python ``multiprocessing`` module. Default is 1 for a purely serial
        calculation, in which case a multiprocessing Pool object will
        never be instantiated. A string 'max' may be used to indicate that
        the pair counters should use all available cores on the machine.

    Returns
    -------
    correlation_function(s) : numpy.array
        *len(theta_bins)-1* length array containing the correlation function
        :math:`w(\theta)` computed in each of the bins defined by input ``theta_bins``.

        .. math::
            1 + w(\theta) \equiv \mathrm{DD}(\theta) / \mathrm{RR}(\theta),

        If ``estimator`` is set to 'Natural'.  :math:`\mathrm{DD}(\theta)` is the number
        of sample pairs with separations equal to :math:`\theta`, calculated by the pair
        counter.  :math:`\mathrm{RR}(\theta)` is the number of random pairs with
        separations equal to :math:`\theta`, and is counted internally using
        "analytic randoms" if ``randoms`` is set to None (see notes for an explanation),
        otherwise it is calculated using the pair counter.

        If ``sample2`` is passed as input
        (and if ``sample2`` is not exactly the same as ``sample1``),
        then three arrays of length *len(rbins)-1* are returned:

        .. math::
            w_{11}(\theta), w_{12}(\theta), w_{22}(\theta),

        the autocorrelation of ``sample1``, the cross-correlation between ``sample1`` and
        ``sample2``, and the autocorrelation of ``sample2``, respectively.
        If ``do_auto`` or ``do_cross`` is set to False,
        the appropriate sequence of results is returned.

    Notes
    -----
    Pairs are counted using `~halotools.mock_observables.npairs_3d`.

    Examples
    --------
    For demonstration purposes we create a randomly distributed set of points on the
    sky:

    >>> from halotools.utils import sample_spherical_surface
    >>> Npts = 1000
    >>> angular_coords = sample_spherical_surface(Npts) #in degrees

    >>> theta_bins = np.logspace(-2,1,10)
    >>> w = angular_tpcf(angular_coords, theta_bins)

    See also
    --------
    :ref:`galaxy_catalog_analysis_tutorial9`
    """

    # check input arguments using clustering helper functions
    function_args = (
        sample1,
        theta_bins,
        sample2,
        randoms,
        do_auto,
        do_cross,
        estimator,
        num_threads,
    )

    # pass arguments in, and get out processed arguments, plus some control flow variables
    (
        sample1,
        theta_bins,
        sample2,
        randoms,
        do_auto,
        do_cross,
        num_threads,
        _sample1_is_sample2,
    ) = _angular_tpcf_process_args(*function_args)

    # convert angular bins to coord lengths on a unit sphere
    chord_bins = chord_to_cartesian(theta_bins, radians=False)

    # convert samples and randoms to cartesian coordinates (x,y,z) on a unit sphere
    x, y, z = spherical_to_cartesian(sample1[:, 0], sample1[:, 1])
    sample1 = np.vstack((x, y, z)).T
    if _sample1_is_sample2:
        sample2 = sample1
    else:
        x, y, z = spherical_to_cartesian(sample2[:, 0], sample2[:, 1])
        sample2 = np.vstack((x, y, z)).T
    if randoms is not None:
        x, y, z = spherical_to_cartesian(randoms[:, 0], randoms[:, 1])
        randoms = np.vstack((x, y, z)).T

    def random_counts(
        sample1,
        sample2,
        randoms,
        chord_bins,
        num_threads,
        do_RR,
        do_DR,
        _sample1_is_sample2,
    ):
        """
        Count random pairs.
        """

        def area_spherical_cap(chord):
            """
            Calculate the area of a spherical cap on a unit sphere given the chord length
            """
            h = 1.0 - np.sqrt(1.0 - chord**2)
            return np.pi * (chord**2 + h**2)

        # randoms provided, so calculate random pair counts.
        if randoms is not None:
            if do_RR is True:
                RR = npairs_3d(randoms, randoms, chord_bins, num_threads=num_threads)
                RR = np.diff(RR)
            else:
                RR = None
            if do_DR is True:
                D1R = npairs_3d(sample1, randoms, chord_bins, num_threads=num_threads)
                D1R = np.diff(D1R)
            else:
                D1R = None
            if _sample1_is_sample2:
                D2R = None
            else:
                if do_DR is True:
                    D2R = npairs_3d(
                        sample2, randoms, chord_bins, num_threads=num_threads
                    )
                    D2R = np.diff(D2R)
                else:
                    D2R = None
            return D1R, D2R, RR
        elif randoms is None:

            # set the number of randoms equal to the number of points in sample1
            # cbx: Maybe stupid question but why not use much more? Less noise at no cost right?
            NR = len(sample1)

            # do area calculations
            da = area_spherical_cap(chord_bins)
            da = np.diff(da)
            global_area = 4.0 * np.pi  # surface area of a unit sphere

            # calculate randoms for sample1
            N1 = np.shape(sample1)[0]  # number of points in sample1
            rho1 = N1 / global_area  # number density of points
            D1R = (N1) * (da * rho1)  # random counts are N**2*dv*rho

            N2 = np.shape(sample2)[0]
            rho2 = N2 / global_area  # number density of points
            D2R = N2 * (da * rho2)

            # calculate the random-random pairs.
            # cbx: This should be NR(NR-1) if we are doing an autocorr I think? Maybe it doesn't matter.
            rhor = NR**2 / global_area
            RR = da * rhor

            return D1R, D2R, RR

    def pair_counts(
        sample1,
        sample2,
        chord_bins,
        num_threads,
        do_auto,
        do_cross,
        _sample1_is_sample2,
    ):
        """
        Count data-data pairs.
        """

        if do_auto is True:
            D1D1 = npairs_3d(sample1, sample1, chord_bins, num_threads=num_threads)
            D1D1 = np.diff(D1D1)
        else:
            D1D1 = None
            D2D2 = None

        if _sample1_is_sample2:
            D1D2 = D1D1
            D2D2 = D1D1
        else:
            if do_cross is True:
                D1D2 = npairs_3d(sample1, sample2, chord_bins, num_threads=num_threads)
                D1D2 = np.diff(D1D2)
            else:
                D1D2 = None
            if do_auto is True:
                D2D2 = npairs_3d(sample2, sample2, chord_bins, num_threads=num_threads)
                D2D2 = np.diff(D2D2)
            else:
                D2D2 = None

        return D1D1, D1D2, D2D2

    # What needs to be done?
    do_DD, do_DR, do_RR = _TP_estimator_requirements(estimator)

    # How many points are there (for normalization purposes)?
    N1 = len(sample1)
    N2 = len(sample2)
    if randoms is not None:
        NR = len(randoms)
    else:
        # set the number of randoms equal to the number of points in sample1
        # this is arbitrarily set, but must remain consistent!
        NR = N1

    # count data pairs
    D1D1, D1D2, D2D2 = pair_counts(
        sample1,
        sample2,
        chord_bins,
        num_threads,
        do_auto,
        do_cross,
        _sample1_is_sample2,
    )
    # count random pairs
    D1R, D2R, RR = random_counts(
        sample1,
        sample2,
        randoms,
        chord_bins,
        num_threads,
        do_RR,
        do_DR,
        _sample1_is_sample2,
    )

    # run results through the estimator and return relavent/user specified results.
    if _sample1_is_sample2:
        xi_11 = _TP_estimator(D1D1, D1R, RR, N1, N1, NR, NR, estimator)
        return xi_11
    else:
        if (do_auto is True) & (do_cross is True):
            xi_11 = _TP_estimator(D1D1, D1R, RR, N1, N1, NR, NR, estimator)
            xi_12 = _TP_estimator_crossx(D1D2, D1R, D2R, RR, N1, N2, NR, NR, estimator)
            xi_22 = _TP_estimator(D2D2, D2R, RR, N2, N2, NR, NR, estimator)
            return xi_11, xi_12, xi_22
        elif do_cross is True:
            xi_12 = _TP_estimator_crossx(D1D2, D1R, D2R, RR, N1, N2, NR, NR, estimator)
            return xi_12
        elif do_auto is True:
            xi_11 = _TP_estimator(D1D1, D1R, RR, N1, N1, NR, NR, estimator)
            xi_22 = _TP_estimator(D2D2, D2R, RR, N2, N2, NR, NR, estimator)
            return xi_11, xi_22



def _angular_tpcf_process_args(
    sample1, theta_bins, sample2, randoms, do_auto, do_cross, estimator, num_threads
):
    """
    Private method to do bounds-checking on the arguments passed to
    `~halotools.mock_observables.angular_tpcf`.
    """

    sample1 = np.atleast_1d(sample1)

    sample2, _sample1_is_sample2, do_cross = process_optional_input_sample2(
        sample1, sample2, do_cross, ndim=2
    )

    if randoms is not None:
        randoms = np.atleast_1d(randoms)

    theta_bins = np.atleast_1d(theta_bins)
    theta_max = np.max(theta_bins)
    try:
        assert theta_bins.ndim == 1
        assert len(theta_bins) > 1
        if len(theta_bins) > 2:
            assert array_is_monotonic(theta_bins, strict=True) == 1
    except AssertionError:
        msg = (
            "\n Input `theta_bins` must be a monotonically increasing 1-D \n"
            "array with at least two entries."
        )
        raise HalotoolsError(msg)

    # check for input parameter consistency
    if theta_max >= 180.0:
        msg = (
            "\n The maximum length over which you search for pairs of points \n"
            "cannot be larger than 180.0 deg. \n"
        )
        raise HalotoolsError(msg)

    if (type(do_auto) is not bool) | (type(do_cross) is not bool):
        msg = "\n `do_auto` and `do_cross` keywords must be of type boolean."
        raise HalotoolsError(msg)

    num_threads = get_num_threads(num_threads)

    verify_tpcf_estimator(estimator)

    return (
        sample1,
        theta_bins,
        sample2,
        randoms,
        do_auto,
        do_cross,
        num_threads,
        _sample1_is_sample2,
    )
Navigation

Source code for halotools.mock_observables.two_point_clustering.angular_tpcf