Source code for halotools.sim_manager.user_supplied_ptcl_catalog

""" Module containing the UserSuppliedPtclCatalog class.
"""
import numpy as np
import os
from warnings import warn
import datetime

from astropy.table import Table

try:
    import h5py
except ImportError:
    warn("Most of the functionality of the sim_manager "
         "sub-package requires h5py to be installed,\n"
         "which can be accomplished either with pip or conda")

from .ptcl_table_cache import PtclTableCache
from .ptcl_table_cache_log_entry import PtclTableCacheLogEntry
from .halo_table_cache_log_entry import get_redshift_string

from ..utils.array_utils import custom_len
from ..custom_exceptions import HalotoolsError

__all__ = ('UserSuppliedPtclCatalog', )


[docs]class UserSuppliedPtclCatalog(object):
    """ Class used to transform a user-provided particle catalog
    into the standard form recognized by Halotools.

    Random downsamplings of dark matter particles are not especially useful
    catalogs in their own right. So primary purpose of this class
    is the `add_ptclcat_to_cache` method,
    which sets you up to use the dark matter particle collection
    together with the associated halo catalog.

    See :ref:`working_with_alternative_particle_data` for a tutorial on this class.

    """

    def __init__(self, **kwargs):
        """

        Parameters
        ------------
        **metadata : float or string
            Keyword arguments storing catalog metadata.
            The quantities `Lbox` and `particle_mass`
            are required and must be in Mpc/h and Msun/h units, respectively.
            `redshift` is also required metadata.
            See Examples section for further notes.

        **ptcl_catalog_columns : sequence of arrays
            Sequence of length-*Nptcls* arrays passed in as keyword arguments.

            Each key will be the column name attached to the input array.
            At a minimum, there must be columns ``x``, ``y`` and ``z``.
            See Examples section for further notes.

        Examples
        ----------
        Here is an example using dummy data to show how to create a new `UserSuppliedPtclCatalog`
        and store it in cache for future use with the associated halo catalog.
        First the setup:

        >>> redshift = 0.0
        >>> Lbox = 250.
        >>> particle_mass = 1e9
        >>> num_ptcls = int(1e4)
        >>> x = np.random.uniform(0, Lbox, num_ptcls)
        >>> y = np.random.uniform(0, Lbox, num_ptcls)
        >>> z = np.random.uniform(0, Lbox, num_ptcls)
        >>> ptcl_ids = np.arange(0, num_ptcls)
        >>> vx = np.random.uniform(-100, 100, num_ptcls)
        >>> vy = np.random.uniform(-100, 100, num_ptcls)
        >>> vz = np.random.uniform(-100, 100, num_ptcls)

        Now we simply pass in both the metadata and the particle catalog columns as keyword arguments:

        >>> ptcl_catalog = UserSuppliedPtclCatalog(redshift=redshift, Lbox=Lbox, particle_mass=particle_mass, x=x, y=y, z=z, vx=vx, vy=vy, vz=vz, ptcl_ids=ptcl_ids)

        Take note: it is important that the value of the input ``redshift`` matches
        whatever the redshift is of the associated halo catalog. Your ``redshift``
        should be accurate to four decimal places.

        Now that we have built a Halotools-formatted particle catalog, we can add it to the cache as follows.

        First choose a relatively permanent location on disk where you will be storing the particle data:

        >>> my_fname = 'some_fname.hdf5'

        Next choose the ``simname`` that matches the ``simname`` of the associated halo catalog, for example:

        >>> my_simname = 'bolplanck'

        Now choose any version name that will help you keep track of
        potentially different version of the same catalog of particles.

        >>> my_version_name = 'any version name'

        Finally, give a short, plain-language descriptions of how
        you obtained your collection of particles:

        >>> my_processing_notes = 'This particle catalog was obtained through the following means: ...'

        Now we add the particle catalog to cache using the following syntax:

        >>> ptcl_catalog.add_ptclcat_to_cache(my_fname, my_simname, my_version_name, my_processing_notes) # doctest: +SKIP

        Your particle catalog has now been cached and is accessible whenever
        you load the associated halo catalog into memory. For example:

        >>> from halotools.sim_manager import CachedHaloCatalog
        >>> halocat = CachedHaloCatalog(simname=my_simname, halo_finder='some halo-finder', version_name='some version-name', redshift=redshift, ptcl_version_name=my_version_name) # doctest: +SKIP

        Note the arguments passed to the `~halotools.sim_manager.CachedHaloCatalog` class.
        The ``version_name`` here refers to the *halos*, not the particles.
        When loading the `~halotools.sim_manager.CachedHaloCatalog`,
        you specify the version name of the particles
        with the ``ptcl_version_name`` keyword argument.
        The ``ptcl_version_name`` need not agree with the ``version_name`` of the associated halos.
        This allows halo and particle catalogs to evolve independently over time.
        In fact, for cases where you have supplied your own particles, it is *strongly* recommended
        that you choose a version name for your particles that differs from the version name
        that Halotools uses for its catalogs. This will help avoid future confusion over the
        where the cached particle catalog came from.

        The particle catalog itself is stored in the ``ptcl_table`` attribute,
        with columns accessed as follows:

        >>> array_of_x_positions = halocat.ptcl_table['x'] # doctest: +SKIP

        If you do not wish to store your particle catalog in cache,
        see the :ref:`using_user_supplied_ptcl_catalog_without_the_cache` section
        of the :ref:`working_with_alternative_particle_data` tutorial.

        """

        ptcl_table_dict, metadata_dict = self._parse_constructor_kwargs(**kwargs)
        self.ptcl_table = Table(ptcl_table_dict)

        self._test_metadata_dict(**metadata_dict)

        # make Lbox a 3-vector
        _Lbox = metadata_dict.pop('Lbox')
        metadata_dict['Lbox'] = np.empty(3)
        metadata_dict['Lbox'][:] = _Lbox

        for key, value in metadata_dict.items():
            setattr(self, key, value)

    def _parse_constructor_kwargs(self, **kwargs):
        """
        """
        try:
            x = kwargs['x']
            assert type(x) is np.ndarray
            y = kwargs['y']
            assert type(y) is np.ndarray
            z = kwargs['z']
            assert type(z) is np.ndarray

            Nptcls = custom_len(x)
            assert Nptcls >= 1e4
            assert Nptcls == len(y)
            assert Nptcls == len(z)
        except KeyError:
            msg = ("\nThe UserSuppliedPtclCatalog requires ``x``, ``y`` and "
                   "``z`` keyword arguments,\n each of which must store an "
                   "ndarray of the same length Nptcls >= 1e4.\n")

            raise HalotoolsError(msg)

        ptcl_table_dict = (
            {key: kwargs[key] for key in kwargs
             if (type(kwargs[key]) is np.ndarray) and
             (custom_len(kwargs[key]) == Nptcls)})

        metadata_dict = (
            {key: kwargs[key] for key in kwargs if key not in ptcl_table_dict})

        return ptcl_table_dict, metadata_dict

    def _test_metadata_dict(self, **metadata_dict):

        try:
            assert 'Lbox' in metadata_dict
            assert custom_len(metadata_dict['Lbox']) in [1,3]
        except AssertionError:
            msg = ("\nThe UserSuppliedPtclCatalog requires keyword argument "
                   "``Lbox``, storing either a scalar or 3-vector.\n")
            raise HalotoolsError(msg)

        try:
            assert 'particle_mass' in metadata_dict
            assert custom_len(metadata_dict['particle_mass']) == 1
            assert 'redshift' in metadata_dict
        except AssertionError:
            msg = ("\nThe UserSuppliedPtclCatalog requires keyword arguments "
                   "``particle_mass`` and ``redshift``\n"
                   "storing scalars that will be interpreted as metadata "
                   "about the particle catalog.\n")
            raise HalotoolsError(msg)

        Lbox = np.empty(3)
        Lbox[:] = metadata_dict['Lbox']
        assert (Lbox > 0).all(), "``Lbox`` must be positive"

        try:
            x, y, z = (
                self.ptcl_table['x'],
                self.ptcl_table['x'],
                self.ptcl_table['z'])

            assert np.all(x >= 0)
            assert np.all(x <= Lbox[0])
            assert np.all(y >= 0)
            assert np.all(y <= Lbox[1])
            assert np.all(z >= 0)
            assert np.all(z <= Lbox[2])
        except AssertionError:
            msg = ("The ``x``, ``y`` and ``z`` columns must only store "
                   "arrays\n that are bound by 0 and the input ``Lbox``. \n")
            raise HalotoolsError(msg)

        try:
            redshift = float(metadata_dict['redshift'])
        except:
            msg = ("\nThe ``redshift`` metadata must be a float.\n")
            raise HalotoolsError(msg)

[docs]    def add_ptclcat_to_cache(self, fname, simname, version_name,
                             processing_notes, overwrite=False):

        """
        Parameters
        ------------
        fname : string
            Absolute path of the file to be stored in cache.
            Must conclude with an `.hdf5` extension.

        simname : string
            Nickname of the simulation used as a shorthand way to keep track
            of the catalogs in your cache.

        version_name : string
            Nickname of the version of the particle catalog.
            The ``version_name`` is used as a bookkeeping tool in the cache log.
            As described in the `~halotools.sim_manager.UserSuppliedPtclCatalog` docstring,
            the version name selected here need not match the version name
            of the associated halo catalog.

        processing_notes : string
            String used to provide supplementary notes that will be attached to
            the hdf5 file storing your particle data.

        overwrite : bool, optional
            If the chosen ``fname`` already exists, then you must set ``overwrite``
            to True in order to write the file to disk. Default is False.

        """

        ############################################################
        # Perform some consistency checks in the fname
        if (os.path.isfile(fname)) & (overwrite is False):
            msg = ("\nYou attempted to store your particle catalog "
                   "in the following location: \n\n" + str(fname) +
                   "\n\nThis path points to an existing file. \n"
                   "Either choose a different fname or set ``overwrite`` to True.\n")
            raise HalotoolsError(msg)

        try:
            dirname = os.path.dirname(fname)
            assert os.path.exists(dirname)
        except:
            msg = ("\nThe directory you are trying to store the file does not exist. \n")
            raise HalotoolsError(msg)

        if fname[-5:] != '.hdf5':
            msg = ("\nThe fname must end with an ``.hdf5`` extension.\n")
            raise HalotoolsError(msg)

        ############################################################
        # Perform consistency checks on the remaining log entry attributes
        try:
            _ = str(simname)
            _ = str(version_name)
            _ = str(processing_notes)
        except:
            msg = ("\nThe input ``simname``, ``version_name`` "
                   "and ``processing_notes``\nmust all be strings.")
            raise HalotoolsError(msg)

        ############################################################
        # Now write the file to disk and add the appropriate metadata

        self.ptcl_table.write(fname, path='data', overwrite=overwrite)

        f = h5py.File(fname, 'a')

        redshift_string = get_redshift_string(self.redshift)

        f.attrs.create('simname', np.string_(simname))
        f.attrs.create('version_name', np.string_(version_name))
        f.attrs.create('redshift', np.string_(redshift_string))
        f.attrs.create('fname', np.string_(fname))

        f.attrs.create('Lbox', self.Lbox)
        f.attrs.create('particle_mass', self.particle_mass)

        time_right_now = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')
        f.attrs.create('time_catalog_was_originally_cached', np.string_(time_right_now))

        f.attrs.create('processing_notes', np.string_(processing_notes))

        f.close()

        ############################################################
        # Now that the file is on disk, add it to the cache
        cache = PtclTableCache()

        log_entry = PtclTableCacheLogEntry(
            simname=simname, version_name=version_name,
            redshift=self.redshift, fname=fname)

        cache.add_entry_to_cache_log(log_entry, update_ascii=True)
        self.log_entry = log_entry
Navigation

Source code for halotools.sim_manager.user_supplied_ptcl_catalog