Source code for halotools.sim_manager.user_supplied_ptcl_catalog

""" Module containing the UserSuppliedPtclCatalog class.
"""
import numpy as np
import os
from warnings import warn
import datetime

from astropy.table import Table

try:
    import h5py
except ImportError:
    warn("Most of the functionality of the sim_manager "
         "sub-package requires h5py to be installed,\n"
         "which can be accomplished either with pip or conda")

from .ptcl_table_cache import PtclTableCache
from .ptcl_table_cache_log_entry import PtclTableCacheLogEntry
from .halo_table_cache_log_entry import get_redshift_string

from ..utils.array_utils import custom_len
from ..custom_exceptions import HalotoolsError

__all__ = ('UserSuppliedPtclCatalog', )


[docs] class UserSuppliedPtclCatalog(object): """ Class used to transform a user-provided particle catalog into the standard form recognized by Halotools. Random downsamplings of dark matter particles are not especially useful catalogs in their own right. So primary purpose of this class is the `add_ptclcat_to_cache` method, which sets you up to use the dark matter particle collection together with the associated halo catalog. See :ref:`working_with_alternative_particle_data` for a tutorial on this class. """ def __init__(self, **kwargs): """ Parameters ------------ **metadata : float or string Keyword arguments storing catalog metadata. The quantities `Lbox` and `particle_mass` are required and must be in Mpc/h and Msun/h units, respectively. `redshift` is also required metadata. See Examples section for further notes. **ptcl_catalog_columns : sequence of arrays Sequence of length-*Nptcls* arrays passed in as keyword arguments. Each key will be the column name attached to the input array. At a minimum, there must be columns ``x``, ``y`` and ``z``. See Examples section for further notes. Examples ---------- Here is an example using dummy data to show how to create a new `UserSuppliedPtclCatalog` and store it in cache for future use with the associated halo catalog. First the setup: >>> redshift = 0.0 >>> Lbox = 250. >>> particle_mass = 1e9 >>> num_ptcls = int(1e4) >>> x = np.random.uniform(0, Lbox, num_ptcls) >>> y = np.random.uniform(0, Lbox, num_ptcls) >>> z = np.random.uniform(0, Lbox, num_ptcls) >>> ptcl_ids = np.arange(0, num_ptcls) >>> vx = np.random.uniform(-100, 100, num_ptcls) >>> vy = np.random.uniform(-100, 100, num_ptcls) >>> vz = np.random.uniform(-100, 100, num_ptcls) Now we simply pass in both the metadata and the particle catalog columns as keyword arguments: >>> ptcl_catalog = UserSuppliedPtclCatalog(redshift=redshift, Lbox=Lbox, particle_mass=particle_mass, x=x, y=y, z=z, vx=vx, vy=vy, vz=vz, ptcl_ids=ptcl_ids) Take note: it is important that the value of the input ``redshift`` matches whatever the redshift is of the associated halo catalog. Your ``redshift`` should be accurate to four decimal places. Now that we have built a Halotools-formatted particle catalog, we can add it to the cache as follows. First choose a relatively permanent location on disk where you will be storing the particle data: >>> my_fname = 'some_fname.hdf5' Next choose the ``simname`` that matches the ``simname`` of the associated halo catalog, for example: >>> my_simname = 'bolplanck' Now choose any version name that will help you keep track of potentially different version of the same catalog of particles. >>> my_version_name = 'any version name' Finally, give a short, plain-language descriptions of how you obtained your collection of particles: >>> my_processing_notes = 'This particle catalog was obtained through the following means: ...' Now we add the particle catalog to cache using the following syntax: >>> ptcl_catalog.add_ptclcat_to_cache(my_fname, my_simname, my_version_name, my_processing_notes) # doctest: +SKIP Your particle catalog has now been cached and is accessible whenever you load the associated halo catalog into memory. For example: >>> from halotools.sim_manager import CachedHaloCatalog >>> halocat = CachedHaloCatalog(simname=my_simname, halo_finder='some halo-finder', version_name='some version-name', redshift=redshift, ptcl_version_name=my_version_name) # doctest: +SKIP Note the arguments passed to the `~halotools.sim_manager.CachedHaloCatalog` class. The ``version_name`` here refers to the *halos*, not the particles. When loading the `~halotools.sim_manager.CachedHaloCatalog`, you specify the version name of the particles with the ``ptcl_version_name`` keyword argument. The ``ptcl_version_name`` need not agree with the ``version_name`` of the associated halos. This allows halo and particle catalogs to evolve independently over time. In fact, for cases where you have supplied your own particles, it is *strongly* recommended that you choose a version name for your particles that differs from the version name that Halotools uses for its catalogs. This will help avoid future confusion over the where the cached particle catalog came from. The particle catalog itself is stored in the ``ptcl_table`` attribute, with columns accessed as follows: >>> array_of_x_positions = halocat.ptcl_table['x'] # doctest: +SKIP If you do not wish to store your particle catalog in cache, see the :ref:`using_user_supplied_ptcl_catalog_without_the_cache` section of the :ref:`working_with_alternative_particle_data` tutorial. """ ptcl_table_dict, metadata_dict = self._parse_constructor_kwargs(**kwargs) self.ptcl_table = Table(ptcl_table_dict) self._test_metadata_dict(**metadata_dict) # make Lbox a 3-vector _Lbox = metadata_dict.pop('Lbox') metadata_dict['Lbox'] = np.empty(3) metadata_dict['Lbox'][:] = _Lbox for key, value in metadata_dict.items(): setattr(self, key, value) def _parse_constructor_kwargs(self, **kwargs): """ """ try: x = kwargs['x'] assert type(x) is np.ndarray y = kwargs['y'] assert type(y) is np.ndarray z = kwargs['z'] assert type(z) is np.ndarray Nptcls = custom_len(x) assert Nptcls >= 1e4 assert Nptcls == len(y) assert Nptcls == len(z) except KeyError: msg = ("\nThe UserSuppliedPtclCatalog requires ``x``, ``y`` and " "``z`` keyword arguments,\n each of which must store an " "ndarray of the same length Nptcls >= 1e4.\n") raise HalotoolsError(msg) ptcl_table_dict = ( {key: kwargs[key] for key in kwargs if (type(kwargs[key]) is np.ndarray) and (custom_len(kwargs[key]) == Nptcls)}) metadata_dict = ( {key: kwargs[key] for key in kwargs if key not in ptcl_table_dict}) return ptcl_table_dict, metadata_dict def _test_metadata_dict(self, **metadata_dict): try: assert 'Lbox' in metadata_dict assert custom_len(metadata_dict['Lbox']) in [1,3] except AssertionError: msg = ("\nThe UserSuppliedPtclCatalog requires keyword argument " "``Lbox``, storing either a scalar or 3-vector.\n") raise HalotoolsError(msg) try: assert 'particle_mass' in metadata_dict assert custom_len(metadata_dict['particle_mass']) == 1 assert 'redshift' in metadata_dict except AssertionError: msg = ("\nThe UserSuppliedPtclCatalog requires keyword arguments " "``particle_mass`` and ``redshift``\n" "storing scalars that will be interpreted as metadata " "about the particle catalog.\n") raise HalotoolsError(msg) Lbox = np.empty(3) Lbox[:] = metadata_dict['Lbox'] assert (Lbox > 0).all(), "``Lbox`` must be positive" try: x, y, z = ( self.ptcl_table['x'], self.ptcl_table['x'], self.ptcl_table['z']) assert np.all(x >= 0) assert np.all(x <= Lbox[0]) assert np.all(y >= 0) assert np.all(y <= Lbox[1]) assert np.all(z >= 0) assert np.all(z <= Lbox[2]) except AssertionError: msg = ("The ``x``, ``y`` and ``z`` columns must only store " "arrays\n that are bound by 0 and the input ``Lbox``. \n") raise HalotoolsError(msg) try: redshift = float(metadata_dict['redshift']) except: msg = ("\nThe ``redshift`` metadata must be a float.\n") raise HalotoolsError(msg)
[docs] def add_ptclcat_to_cache(self, fname, simname, version_name, processing_notes, overwrite=False): """ Parameters ------------ fname : string Absolute path of the file to be stored in cache. Must conclude with an `.hdf5` extension. simname : string Nickname of the simulation used as a shorthand way to keep track of the catalogs in your cache. version_name : string Nickname of the version of the particle catalog. The ``version_name`` is used as a bookkeeping tool in the cache log. As described in the `~halotools.sim_manager.UserSuppliedPtclCatalog` docstring, the version name selected here need not match the version name of the associated halo catalog. processing_notes : string String used to provide supplementary notes that will be attached to the hdf5 file storing your particle data. overwrite : bool, optional If the chosen ``fname`` already exists, then you must set ``overwrite`` to True in order to write the file to disk. Default is False. """ ############################################################ # Perform some consistency checks in the fname if (os.path.isfile(fname)) & (overwrite is False): msg = ("\nYou attempted to store your particle catalog " "in the following location: \n\n" + str(fname) + "\n\nThis path points to an existing file. \n" "Either choose a different fname or set ``overwrite`` to True.\n") raise HalotoolsError(msg) try: dirname = os.path.dirname(fname) assert os.path.exists(dirname) except: msg = ("\nThe directory you are trying to store the file does not exist. \n") raise HalotoolsError(msg) if fname[-5:] != '.hdf5': msg = ("\nThe fname must end with an ``.hdf5`` extension.\n") raise HalotoolsError(msg) ############################################################ # Perform consistency checks on the remaining log entry attributes try: _ = str(simname) _ = str(version_name) _ = str(processing_notes) except: msg = ("\nThe input ``simname``, ``version_name`` " "and ``processing_notes``\nmust all be strings.") raise HalotoolsError(msg) ############################################################ # Now write the file to disk and add the appropriate metadata self.ptcl_table.write(fname, path='data', overwrite=overwrite) f = h5py.File(fname, 'a') redshift_string = get_redshift_string(self.redshift) f.attrs.create('simname', np.string_(simname)) f.attrs.create('version_name', np.string_(version_name)) f.attrs.create('redshift', np.string_(redshift_string)) f.attrs.create('fname', np.string_(fname)) f.attrs.create('Lbox', self.Lbox) f.attrs.create('particle_mass', self.particle_mass) time_right_now = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') f.attrs.create('time_catalog_was_originally_cached', np.string_(time_right_now)) f.attrs.create('processing_notes', np.string_(processing_notes)) f.close() ############################################################ # Now that the file is on disk, add it to the cache cache = PtclTableCache() log_entry = PtclTableCacheLogEntry( simname=simname, version_name=version_name, redshift=self.redshift, fname=fname) cache.add_entry_to_cache_log(log_entry, update_ascii=True) self.log_entry = log_entry