casact · EKtheSage · May 16, 2026 · May 16, 2026 · May 16, 2026 · May 18, 2026
@@ -15,6 +15,12 @@ class BarnettZehnwirth(TweedieGLM):
     """ This estimator enables modeling from the Probabilistic Trend Family as
     described by Barnett and Zehnwirth.
 
+    The model is fit on log-incremental losses and produces multiplicative
+    ``ldf_`` patterns for use with IBNR estimators. Specify the regression
+    structure either with a patsy ``formula`` or with PTF period groupings
+    (``alpha``, ``gamma``, ``iota``) that define origin, trend, and
+    final-period cohorts.
+
     .. versionadded:: 0.8.2
 
     Parameters
@@ -33,6 +39,50 @@ class BarnettZehnwirth(TweedieGLM):
     gamma: list of int
     iota: list of int
 
+    Examples
+    --------
+    When many accident years are available but you want a smaller number of
+    origin cohorts, specify ``alpha``, ``gamma``, and ``iota`` instead of a
+    separate factor for every year. The fitted design has fewer parameters than
+    a fully saturated origin-by-development formula on the same triangle.
+
+    .. testsetup::
+
+        import chainladder as cl
+
+    .. testcode::
+
+        tri = cl.load_sample("abc")
+        m_ptf = cl.BarnettZehnwirth(
+            alpha=[0, 5], gamma=[0, 2, 5], iota=[0, 7, 11]
+        ).fit(tri)
+        m_full = cl.BarnettZehnwirth(
+            formula="C(origin)+C(development)"
+        ).fit(tri)
+        print(len(m_ptf.coef_.values.flatten()))
+        print(len(m_full.coef_.values.flatten()))
+
+    .. testoutput::
+
+        6
+        21
+
+    Use a patsy ``formula`` when the reserving structure needs explicit terms
+    (for example separate origin and development factors) rather than the PTF
+    cohort shorthand.
+
+    .. testcode::
+
+        import numpy as np
+
+        tri = cl.load_sample("abc")
+        m = cl.BarnettZehnwirth(formula="C(origin)+C(development)").fit(tri)
+        print(np.round(m.ldf_.values[0, 0, :4, 0], 4))
+
+    .. testoutput::
+
+        [2.2854 2.2854 2.2854 2.2854]
+
     """
 
     def __init__(self, drop=None,drop_valuation=None,formula=None, response=None, alpha=None, gamma=None, iota=None):

@@ -11,12 +11,21 @@
 
 
 class TweedieGLM(DevelopmentBase):
-    """ This estimator creates development patterns with a GLM using a Tweedie distribution.
+    """ GLM reserving with scikit-learn's Tweedie distribution.
 
-    The Tweedie family includes several of the more popular distributions including
-    the normal, ODP poisson, and gamma distributions.  This class is a special case
-    of `DevleopmentML`.  It restricts to just GLM using a TweedieRegressor and
-    provides an R-like formulation of the design matrix.
+    Implements the GLM reserving structure of Taylor and McGuire. The Tweedie
+    family covers normal, ODP Poisson, gamma, and related targets via ``power``
+    and ``link``. Covariates from any triangle axis can enter through a patsy
+    ``design_matrix`` while staying close to traditional chainladder methods when
+    origin and development are coded categorically.
+
+    Triangles are converted to long-format tables internally (as with
+    ``Triangle.to_frame(keepdims=True)``); origin periods are restated as years
+    from the earliest origin for sklearn compatibility, and the response is
+    converted to an incremental basis before fitting. This class is a special
+    case of :class:`~chainladder.DevelopmentML` that uses only
+    :class:`~sklearn.linear_model.TweedieRegressor` behind a
+    :class:`~chainladder.utils.utility_functions.PatsyFormula` step.
 
     .. versionadded:: 0.8.1
 
@@ -29,7 +38,7 @@ class TweedieGLM(DevelopmentBase):
     design_matrix: formula-like
         A patsy formula describing the independent variables, X of the GLM
     response:  str
-        Column name for the reponse variable of the GLM.  If ommitted, then the
+        Column name for the response variable of the GLM. If omitted, then the
         first column of the Triangle will be used.
     power: float, default=1
             The power determines the underlying target distribution according
@@ -76,6 +85,87 @@ class TweedieGLM(DevelopmentBase):
     ----------
     model_: sklearn.Pipeline
         A scikit-learn Pipeline of the GLM
+
+    Examples
+    --------
+    Volume-weighted chainladder development can be replicated with a
+    Poisson-log GLM on incremental paid losses: categorical origin and
+    development in ``design_matrix``, ``power=1``, and ``link='log'``. The
+    resulting ``ldf_`` matches :class:`~chainladder.Development` closely on
+    ``genins``.
+
+    .. testsetup::
+
+        import chainladder as cl
+
+    .. testcode::
+
+        import numpy as np
+
+        tri = cl.load_sample("genins")
+        odp = cl.TweedieGLM(
+            design_matrix="C(development) + C(origin)",
+            power=1,
+            link="log",
+        ).fit(tri)
+        trad = cl.Development().fit(tri)
+        print(round(float(odp.ldf_.values[0, 0, 0, 0]), 4))
+        print(round(float(trad.ldf_.values[0, 0, 0, 0]), 4))
+        print(np.round(odp.ldf_.values[0, 0, :4, 0], 4))
+
+    .. testoutput::
+
+        3.491
+        3.4906
+        [3.491 3.491 3.491 3.491]
+
+    Patsy R-style formulas set ``design_matrix``; continuous ``development``
+    and ``origin`` terms yield a small coefficient table via ``coef_``.
+
+    .. testcode::
+
+        tri = cl.load_sample("genins")
+        glm = cl.TweedieGLM(design_matrix="development + origin").fit(tri)
+        print(len(glm.coef_))
+        print(round(float(glm.coef_.iloc[0, 0]), 6))
+        print(round(float(glm.coef_.iloc[1, 0]), 6))
+
+    .. testoutput::
+
+        3
+        13.516322
+        -0.006251
+
+    On multi-LOB triangles, interaction terms can keep the model parsimonious
+    (10 coefficients here versus 18+ in a full categorical chainladder). The
+    percent difference in ``cdf_`` versus :class:`~chainladder.Development`
+    stays within about 1% at each ultimate lag:
+
+    .. testcode::
+
+        import numpy as np
+
+        clrd = cl.load_sample("clrd")["CumPaidLoss"].groupby("LOB").sum()
+        clrd = clrd[clrd["LOB"].isin(["ppauto", "comauto"])]
+        dev = cl.TweedieGLM(
+            design_matrix=(
+                "LOB+LOB:C(np.minimum(development, 36))"
+                "+LOB:development+LOB:origin"
+            ),
+            max_iter=1000,
+        ).fit(clrd)
+        trad = cl.Development().fit(clrd)
+        pct = ((dev.cdf_.iloc[..., 0, :] / trad.cdf_) - 1).to_frame().round(3)
+        print(len(dev.coef_))
+        print(np.round(pct.loc["comauto"].values, 3))
+        print(np.round(pct.loc["ppauto"].values, 3))
+
+    .. testoutput::
+
+        10
+        [ 0.002  0.003 -0.01   0.003  0.011  0.008  0.005 -0.    -0.002]
+        [ 0.006  0.003 -0.     0.001  0.002  0.001  0.001  0.001  0.001]
+
     """
 
     def __init__(self, design_matrix='C(development) + C(origin)',

@@ -11,15 +11,26 @@
 class IncrementalAdditive(DevelopmentBase):
     """ The Incremental Additive Method.
 
+    This estimator implements the additive method of Schmidt (2006), Section 4.7:
+    expected incremental losses satisfy ``E[Z_{i,k}] = eta_i * gamma_k``, where
+    ``eta_i`` is exposure (``sample_weight``, e.g. premium) for accident year
+    ``i`` and ``gamma_k`` is an incremental loss ratio at development age ``k``
+    that is common to all accident years. The fitted ``zeta_`` estimates those
+    common ``gamma_k``; unobserved incrementals are completed as
+    ``zeta_ * sample_weight``. Dollar ``incremental_`` differ by origin because
+    exposure differs; implied multiplicative ``ldf_`` are derived from the
+    completed incremental triangle and can also differ by origin.
+
     Parameters
     ----------
     trend: float (default=0.0)
-        A multiplicative trend amount used to trend each incremental development
-        period the valuation_date of the Triangle.
+        Implementation extension (not in Schmidt, 2006): multiplicative trend
+        applied to incremental losses before ``zeta_`` is estimated, trending
+        each development period to the triangle valuation date.
     future_trend: float (default=None)
-        The trend to apply to the incremental development periods in the lower
-        half of the completed Triangle. If None, then will be set to the value of
-        the trend parameter.
+        Implementation extension: trend applied when projecting incrementals
+        beyond the valuation date into the lower triangle. If None, uses
+        ``trend``.
     n_periods: integer, optional (default=-1)
         number of origin periods to be used in the ldf average calculation. For
         all origin periods, set n_periods=-1
@@ -54,12 +65,12 @@ class IncrementalAdditive(DevelopmentBase):
         The raw incrementals as a percent of exposure trended to the valuation
         date of the Triangle. Only those used in the fitting.
     zeta_: Triangle
-        The fitted incrementals as a percent of exposure trended to the valuation
-        date of the Triangle.
+        Fitted incremental loss ratios ``gamma_k`` (common across accident years)
+        as a percent of exposure, trended to the valuation date of the Triangle.
     cum_zeta_: Triangle
         The fitted cumulative percent of exposure trended to the valuation date of 
         the Triangle
-    w_: ndarray
+    w_ : ndarray
         The weight used in the zeta fitting
     w_tri_: Triangle
         Triangle of w_
@@ -69,6 +80,88 @@ class IncrementalAdditive(DevelopmentBase):
         A triangle of full incremental values.
 
 
+    Examples
+    --------
+    Schmidt (2006), Example F, uses the ``ia_sample`` triangle: cumulative
+    ``loss`` with latest ``exposure`` as ``sample_weight`` (premiums). Fitted
+    ``incremental_`` are dollars by origin and age; ``zeta_`` is one pattern
+    shared across origins; implied ``ldf_`` can still vary by origin.
+
+    .. testsetup::
+
+        import chainladder as cl
+
+    .. testcode::
+
+        import numpy as np
+
+        tri = cl.load_sample("ia_sample")
+        ia = cl.IncrementalAdditive().fit(
+            tri["loss"], sample_weight=tri["exposure"].latest_diagonal
+        )
+        print(np.round(ia.incremental_.values[0, 0, -1, :], 0))
+        print(np.round(ia.ldf_.values[0, 0, :3, :3], 4))
+
+    .. testoutput::
+
+        [1889. 1811. 1256. 1157.  740.  300.]
+        [[1.8531 1.3062 1.2332]
+         [1.8895 1.3191 1.2336]
+         [1.9233 1.3288 1.2301]]
+
+    A volume-weighted estimate of the common ``gamma_k`` across origins,
+    multiplied by latest exposure, reproduces the fitted incrementals in the
+    lower triangle (here at age 72), as in Schmidt's additive predictors.
+
+    .. testcode::
+
+        import numpy as np
+
+        tri = cl.load_sample("ia_sample")
+        ia = cl.IncrementalAdditive().fit(
+            tri["loss"], sample_weight=tri["exposure"].latest_diagonal
+        )
+        zeta = tri["loss"].cum_to_incr().sum("origin") / tri["exposure"].sum("origin")
+        projected = (
+            zeta.values[0, 0, 0, -1]
+            * tri["exposure"].latest_diagonal.values[0, 0, -1, 0]
+        )
+        fitted = ia.incremental_.values[0, 0, -1, -1]
+        print(np.isclose(projected, fitted))
+
+    .. testoutput::
+
+        True
+
+    The ``trend`` and ``future_trend`` parameters are not part of Schmidt
+    (2006); they are chainladder extensions for trending incrementals before
+    fitting ``zeta_`` and when projecting the lower triangle. The effect is
+    material on projected dollars (not on cumulative link-ratio semantics).
+
+    .. testcode::
+
+        import numpy as np
+
+        tri = cl.load_sample("ia_sample")
+        sw = tri["exposure"].latest_diagonal
+        base = cl.IncrementalAdditive().fit(tri["loss"], sample_weight=sw)
+        trended = cl.IncrementalAdditive(trend=0.02, future_trend=0.05).fit(
+            tri["loss"], sample_weight=sw
+        )
+        print(float(np.round(base.incremental_.values[0, 0, -1, -1], 0)))
+        print(float(np.round(trended.incremental_.values[0, 0, -1, -1], 0)))
+
+    .. testoutput::
+
+        300.0
+        383.0
+
+    References
+    ----------
+    Schmidt, K. (2006). Methods and Models of Loss Reserving Based on Run-Off
+    Triangles: A Unifying Survey. CAS Forum, Fall 2006, Section 4.7 (Additive
+    Method). https://www.casact.org/sites/default/files/database/forum_06fforum_273.pdf
+
     """
 
     def __init__(