Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
263 changes: 263 additions & 0 deletions development/find_radiation_threshold.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
"""Determine a safe radiation threshold for the growth-factor selector.

This utility compares `Tinker08` halo mass functions computed with
`Eisenstein97GrowthFactor` and `ODEGrowthFactor` over a redshift and mass grid, then
reports the largest radiation-density threshold for which `dndlnm` stays within a
chosen relative tolerance for all masses below a configurable upper limit.
"""

from __future__ import annotations

import argparse
import sys
from dataclasses import dataclass

import numpy as np
from astropy.cosmology import FlatLambdaCDM

from hmf import MassFunction


@dataclass(frozen=True)
class ThresholdConfig:
"""Configuration for the radiation-threshold scan.

Parameters
----------
z_min
Minimum redshift included in the scan.
z_max
Maximum redshift included in the scan.
dz
Redshift step size.
mass_limit
Maximum halo mass, in `Msun / h`, included in the error check.
mmin
Minimum log10 halo mass passed to `MassFunction`.
mmax
Maximum log10 halo mass passed to `MassFunction`.
dlog10m
Log10 halo-mass spacing passed to `MassFunction`.
tolerance
Maximum allowed relative difference in `dndlnm`.
hmf_model
Halo mass function model to evaluate.
H0
Hubble constant for the comparison cosmology.
Om0
Matter density parameter at `z=0`.
Ob0
Baryon density parameter at `z=0`.
Tcmb0
CMB temperature for the comparison cosmology.
sigma_8
Present-day amplitude of matter fluctuations.
n
Primordial scalar spectral index.
"""

z_min: float = 0.0
z_max: float = 12.0
dz: float = 0.1
mass_limit: float = 1.0e14
mmin: float = 10.0
mmax: float = 14.2
dlog10m: float = 0.02
tolerance: float = 0.01
hmf_model: str = "Tinker08"
H0: float = 67.74
Om0: float = 0.3089
Ob0: float = 0.0486
Tcmb0: float = 2.7255
sigma_8: float = 0.8159
n: float = 0.9667

@property
def cosmo(self) -> FlatLambdaCDM:
"""Comparison cosmology used for the scan."""
return FlatLambdaCDM(
H0=self.H0,
Om0=self.Om0,
Ob0=self.Ob0,
Tcmb0=self.Tcmb0,
)

@property
def redshifts(self) -> np.ndarray:
"""Redshift grid used for the scan."""
return np.arange(self.z_min, self.z_max + self.dz / 2, self.dz)


def radiation_density(cosmo: FlatLambdaCDM, z: float) -> float:
"""Compute the fractional radiation density at redshift `z`.

Parameters
----------
cosmo
Cosmology used for the comparison.
z
Redshift at which to evaluate the radiation fraction.

Returns
-------
float
Radiation density fraction, matching the selector logic used by
`GrowthFactor.radiation_density`.
"""
return float((cosmo.Ogamma0 + cosmo.Onu0) * (1 + z) ** 4 * cosmo.inv_efunc(z) ** 2)


def build_mass_function(z: float, growth_model: str, config: ThresholdConfig) -> MassFunction:
"""Construct a comparison `MassFunction` instance.

Parameters
----------
z
Redshift of the calculation.
growth_model
Growth-factor model name to pass to `MassFunction`.
config
Configuration for the threshold scan.

Returns
-------
MassFunction
Configured mass function instance.
"""
return MassFunction(
z=z,
hmf_model=config.hmf_model,
growth_model=growth_model,
transfer_model="EH",
mdef_model="SOMean",
mdef_params={"overdensity": 200},
Mmin=config.mmin,
Mmax=config.mmax,
dlog10m=config.dlog10m,
cosmo_params={
"H0": config.H0,
"Om0": config.Om0,
"Ob0": config.Ob0,
"Tcmb0": config.Tcmb0,
},
sigma_8=config.sigma_8,
n=config.n,
)


def max_relative_dndlnm_error(z: float, config: ThresholdConfig) -> float:
"""Return the maximum relative `dndlnm` error at one redshift.

Parameters
----------
z
Redshift at which to compare the growth models.
config
Configuration for the threshold scan.

Returns
-------
float
Maximum relative difference between the Eisenstein and ODE `dndlnm`
predictions for masses below `config.mass_limit`.
"""
eisenstein = build_mass_function(z, "Eisenstein97GrowthFactor", config)
ode = build_mass_function(z, "ODEGrowthFactor", config)

mask = eisenstein.m < config.mass_limit
rel = np.abs(eisenstein.dndlnm[mask] - ode.dndlnm[mask]) / ode.dndlnm[mask]
return float(np.max(rel))


def find_safe_threshold(config: ThresholdConfig) -> list[tuple[float, float, float]]:
"""Scan the grid and collect redshift, radiation density, and HMF error.

Parameters
----------
config
Configuration for the threshold scan.

Returns
-------
list of tuple
Tuples of `(z, radiation_density, max_relative_error)`.
"""
return [
(
float(z),
radiation_density(config.cosmo, float(z)),
max_relative_dndlnm_error(float(z), config),
)
for z in config.redshifts
]


def write_line(text: str = "") -> None:
"""Write one line to standard output."""
sys.stdout.write(f"{text}\n")


def build_parser() -> argparse.ArgumentParser:
"""Create the command-line parser for the utility."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--z-min", type=float, default=ThresholdConfig.z_min)
parser.add_argument("--z-max", type=float, default=ThresholdConfig.z_max)
parser.add_argument("--dz", type=float, default=ThresholdConfig.dz)
parser.add_argument("--mass-limit", type=float, default=ThresholdConfig.mass_limit)
parser.add_argument("--mmin", type=float, default=ThresholdConfig.mmin)
parser.add_argument("--mmax", type=float, default=ThresholdConfig.mmax)
parser.add_argument("--dlog10m", type=float, default=ThresholdConfig.dlog10m)
parser.add_argument("--tolerance", type=float, default=ThresholdConfig.tolerance)
parser.add_argument("--hmf-model", type=str, default=ThresholdConfig.hmf_model)
return parser


def main() -> int:
"""Run the threshold scan and print a compact report."""
args = build_parser().parse_args()
config = ThresholdConfig(
z_min=args.z_min,
z_max=args.z_max,
dz=args.dz,
mass_limit=args.mass_limit,
mmin=args.mmin,
mmax=args.mmax,
dlog10m=args.dlog10m,
tolerance=args.tolerance,
hmf_model=args.hmf_model,
)

rows = find_safe_threshold(config)
safe_rows = [row for row in rows if row[2] <= config.tolerance]
failing_rows = [row for row in rows if row[2] > config.tolerance]

write_line("z radiation_density max_rel_dndlnm_error")
for z, rad, err in rows:
write_line(f"{z:5.2f} {rad:17.10f} {err:20.10f}")

if not safe_rows:
write_line("\nNo safe redshifts found on the requested grid.")
return 1

z_safe, threshold, err_safe = safe_rows[-1]
write_line(
"\nLargest safe threshold on scanned grid: "
f"{threshold:.10f} at z={z_safe:.2f} with max_rel_error={err_safe:.6f}"
)

if failing_rows:
z_fail, rad_fail, err_fail = failing_rows[0]
write_line(
"First failing point: "
f"z={z_fail:.2f}, radiation_density={rad_fail:.10f}, max_rel_error={err_fail:.6f}"
)
else:
write_line(
"No failing points found on the scanned grid; increase --z-max to probe further."
)

return 0


if __name__ == "__main__":
raise SystemExit(main())
124 changes: 124 additions & 0 deletions docs/technical/colossus_comparison.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
Colossus Comparison Notes
=========================

This note documents the known causes of differences between ``hmf`` and
`Colossus <https://bdiemer.bitbucket.io/colossus/>`_ for the native
``Tinker08`` halo mass function.
In versions pre-3.6.1, a major difference was the growth factor computation, which was
definitively less accurate in ``hmf`` than in Colossus. However, after
fixing the growth factor implementation in
`this PR <https://github.com/halomod/hmf/pull/270>`_ for v3.6.0, and then tightening
the growth-selector threshold in v3.6.1, some small residual differences remain,
particularly at high redshift.

The goal of this note is not to argue that either code is definitively "more
correct". Instead, it records the main implementation choices that explain the
observed residual differences so that users and developers understand where
agreement is expected and where small systematic offsets are normal.

Setup of the comparison
-----------------------

The comparisons discussed here were made with:

- the native ``200m`` form of ``Tinker08``,
- matched flat cosmologies with ``H0 = 67.74``, ``Om0 = 0.3089``,
``Ob0 = 0.0486``, ``sigma8 = 0.8159``, and ``ns = 0.9667``,
- the ``EH`` transfer model in ``hmf``, and
- direct comparisons of ``dndlnm`` at representative masses
:math:`10^{11}`, :math:`10^{12}`, and :math:`10^{13}\,M_\odot/h`.

With this setup, the mismatch is small at low redshift and grows
toward high redshift, particularly in the rare-halo tail.

What is *not* driving the mismatch
----------------------------------

Several obvious suspects were checked and found not to be the dominant cause:

- **Mass-definition conversion:** this comparison uses native ``200m``
``Tinker08`` predictions, so it does not rely on the mass-definition
conversion path.
- **Transfer-function normalization at** :math:`z=0`: ``hmf`` and Colossus
agree on :math:`\sigma(M,0)` at about the :math:`10^{-4}` level for the
matched setup.
- **Slope term:** the logarithmic slope entering ``dndlnm``,
:math:`d \ln \sigma / d \ln R`, agrees at the sub-:math:`0.1\%` level.

The main causes of the residual difference
------------------------------------------

Two effects dominate the remaining mismatch.

High-redshift growth implementation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

After the selector update, ``hmf`` uses the full ODE growth solution whenever
the radiation fraction exceeds the calibrated threshold (essentially for z>1.5).
Colossus uses a different hybrid approach for LCDM cosmologies:

- an analytic matter-radiation approximation at high redshift, and
- an integral solution at low redshift,

with a transition regime between them.

These are both reasonable algorithmic choices, but they do not produce exactly
the same high-redshift growth history. In the matched comparison used here,
``hmf`` ends up with a slightly larger :math:`\sigma(M, z)` than Colossus at
high redshift, typically by about :math:`0.26`--:math:`0.34\%` over
``z = 6``--``10`` for the masses tested.

That difference is tiny in :math:`\sigma` itself, but it is evaluated in the
exponential tail of the halo mass function, where very small shifts in
:math:`\sigma` can produce multi-percent shifts in abundance.

Tinker08 coefficient precision
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The two codes also do not use numerically identical ``Tinker08`` coefficients.

``hmf`` stores the more precise coefficient values, for example:

- ``A_200 = 0.1858659``
- ``a_200 = 1.466904``
- ``b_200 = 2.571104``
- ``c_200 = 1.193958``

Colossus uses the rounded table values:

- ``A_200 = 0.186``
- ``a_200 = 1.47``
- ``b_200 = 2.57``
- ``c_200 = 1.19``

At fixed :math:`\sigma`, this coefficient rounding changes :math:`f(\sigma)` by
only a little at low redshift, but by several percent in the high-redshift
tail. In the matched tests performed here, the coefficient choice alone
accounts for approximately:

- :math:`2`--:math:`6\%` at ``z = 6``,
- :math:`3`--:math:`10\%` at ``z = 8``, and
- :math:`4`--:math:`15\%` at ``z = 10``,

depending on mass.

How the effects combine
-----------------------

The two dominant effects push in opposite directions:

- the slightly larger high-redshift :math:`\sigma(M, z)` in ``hmf`` tends to
increase the abundance relative to Colossus, while
- the more precise ``Tinker08`` coefficients in ``hmf`` tend to decrease the
abundance relative to Colossus' rounded table.

Because these effects partially cancel, the final mismatch is significantly
smaller than either ingredient by itself. For the matched setup used in the
external regression test, the resulting ``hmf`` versus Colossus difference is
typically of order:

- ``z = 6``: :math:`\sim 0.1\%` to :math:`2.7\%`,
- ``z = 8``: :math:`\sim 1\%` to :math:`7\%`,
- ``z = 10``: :math:`\sim 2\%` to :math:`14\%`,

over the range :math:`10^{11}`--:math:`10^{13}\,M_\odot/h`.
1 change: 1 addition & 0 deletions docs/technical/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ Technical Documentation and Derivations
:maxdepth: 1

growth_factors
colossus_comparison
Loading
Loading