diff --git a/chainladder/development/barnzehn.py b/chainladder/development/barnzehn.py index 20ae0b79..7a1580bc 100644 --- a/chainladder/development/barnzehn.py +++ b/chainladder/development/barnzehn.py @@ -15,6 +15,12 @@ class BarnettZehnwirth(TweedieGLM): """ This estimator enables modeling from the Probabilistic Trend Family as described by Barnett and Zehnwirth. + The model is fit on log-incremental losses and produces multiplicative + ``ldf_`` patterns for use with IBNR estimators. Specify the regression + structure either with a patsy ``formula`` or with PTF period groupings + (``alpha``, ``gamma``, ``iota``) that define origin, trend, and + final-period cohorts. + .. versionadded:: 0.8.2 Parameters @@ -33,6 +39,50 @@ class BarnettZehnwirth(TweedieGLM): gamma: list of int iota: list of int + Examples + -------- + When many accident years are available but you want a smaller number of + origin cohorts, specify ``alpha``, ``gamma``, and ``iota`` instead of a + separate factor for every year. The fitted design has fewer parameters than + a fully saturated origin-by-development formula on the same triangle. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + tri = cl.load_sample("abc") + m_ptf = cl.BarnettZehnwirth( + alpha=[0, 5], gamma=[0, 2, 5], iota=[0, 7, 11] + ).fit(tri) + m_full = cl.BarnettZehnwirth( + formula="C(origin)+C(development)" + ).fit(tri) + print(len(m_ptf.coef_.values.flatten())) + print(len(m_full.coef_.values.flatten())) + + .. testoutput:: + + 6 + 21 + + Use a patsy ``formula`` when the reserving structure needs explicit terms + (for example separate origin and development factors) rather than the PTF + cohort shorthand. + + .. testcode:: + + import numpy as np + + tri = cl.load_sample("abc") + m = cl.BarnettZehnwirth(formula="C(origin)+C(development)").fit(tri) + print(np.round(m.ldf_.values[0, 0, :4, 0], 4)) + + .. testoutput:: + + [2.2854 2.2854 2.2854 2.2854] + """ def __init__(self, drop=None,drop_valuation=None,formula=None, response=None, alpha=None, gamma=None, iota=None): diff --git a/chainladder/development/glm.py b/chainladder/development/glm.py index c44bb0d5..9db21142 100644 --- a/chainladder/development/glm.py +++ b/chainladder/development/glm.py @@ -11,12 +11,21 @@ class TweedieGLM(DevelopmentBase): - """ This estimator creates development patterns with a GLM using a Tweedie distribution. + """ GLM reserving with scikit-learn's Tweedie distribution. - The Tweedie family includes several of the more popular distributions including - the normal, ODP poisson, and gamma distributions. This class is a special case - of `DevleopmentML`. It restricts to just GLM using a TweedieRegressor and - provides an R-like formulation of the design matrix. + Implements the GLM reserving structure of Taylor and McGuire. The Tweedie + family covers normal, ODP Poisson, gamma, and related targets via ``power`` + and ``link``. Covariates from any triangle axis can enter through a patsy + ``design_matrix`` while staying close to traditional chainladder methods when + origin and development are coded categorically. + + Triangles are converted to long-format tables internally (as with + ``Triangle.to_frame(keepdims=True)``); origin periods are restated as years + from the earliest origin for sklearn compatibility, and the response is + converted to an incremental basis before fitting. This class is a special + case of :class:`~chainladder.DevelopmentML` that uses only + :class:`~sklearn.linear_model.TweedieRegressor` behind a + :class:`~chainladder.utils.utility_functions.PatsyFormula` step. .. versionadded:: 0.8.1 @@ -29,7 +38,7 @@ class TweedieGLM(DevelopmentBase): design_matrix: formula-like A patsy formula describing the independent variables, X of the GLM response: str - Column name for the reponse variable of the GLM. If ommitted, then the + Column name for the response variable of the GLM. If omitted, then the first column of the Triangle will be used. power: float, default=1 The power determines the underlying target distribution according @@ -76,6 +85,87 @@ class TweedieGLM(DevelopmentBase): ---------- model_: sklearn.Pipeline A scikit-learn Pipeline of the GLM + + Examples + -------- + Volume-weighted chainladder development can be replicated with a + Poisson-log GLM on incremental paid losses: categorical origin and + development in ``design_matrix``, ``power=1``, and ``link='log'``. The + resulting ``ldf_`` matches :class:`~chainladder.Development` closely on + ``genins``. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + import numpy as np + + tri = cl.load_sample("genins") + odp = cl.TweedieGLM( + design_matrix="C(development) + C(origin)", + power=1, + link="log", + ).fit(tri) + trad = cl.Development().fit(tri) + print(round(float(odp.ldf_.values[0, 0, 0, 0]), 4)) + print(round(float(trad.ldf_.values[0, 0, 0, 0]), 4)) + print(np.round(odp.ldf_.values[0, 0, :4, 0], 4)) + + .. testoutput:: + + 3.491 + 3.4906 + [3.491 3.491 3.491 3.491] + + Patsy R-style formulas set ``design_matrix``; continuous ``development`` + and ``origin`` terms yield a small coefficient table via ``coef_``. + + .. testcode:: + + tri = cl.load_sample("genins") + glm = cl.TweedieGLM(design_matrix="development + origin").fit(tri) + print(len(glm.coef_)) + print(round(float(glm.coef_.iloc[0, 0]), 6)) + print(round(float(glm.coef_.iloc[1, 0]), 6)) + + .. testoutput:: + + 3 + 13.516322 + -0.006251 + + On multi-LOB triangles, interaction terms can keep the model parsimonious + (10 coefficients here versus 18+ in a full categorical chainladder). The + percent difference in ``cdf_`` versus :class:`~chainladder.Development` + stays within about 1% at each ultimate lag: + + .. testcode:: + + import numpy as np + + clrd = cl.load_sample("clrd")["CumPaidLoss"].groupby("LOB").sum() + clrd = clrd[clrd["LOB"].isin(["ppauto", "comauto"])] + dev = cl.TweedieGLM( + design_matrix=( + "LOB+LOB:C(np.minimum(development, 36))" + "+LOB:development+LOB:origin" + ), + max_iter=1000, + ).fit(clrd) + trad = cl.Development().fit(clrd) + pct = ((dev.cdf_.iloc[..., 0, :] / trad.cdf_) - 1).to_frame().round(3) + print(len(dev.coef_)) + print(np.round(pct.loc["comauto"].values, 3)) + print(np.round(pct.loc["ppauto"].values, 3)) + + .. testoutput:: + + 10 + [ 0.002 0.003 -0.01 0.003 0.011 0.008 0.005 -0. -0.002] + [ 0.006 0.003 -0. 0.001 0.002 0.001 0.001 0.001 0.001] + """ def __init__(self, design_matrix='C(development) + C(origin)', diff --git a/chainladder/development/incremental.py b/chainladder/development/incremental.py index fb0537e0..d1801342 100644 --- a/chainladder/development/incremental.py +++ b/chainladder/development/incremental.py @@ -11,15 +11,26 @@ class IncrementalAdditive(DevelopmentBase): """ The Incremental Additive Method. + This estimator implements the additive method of Schmidt (2006), Section 4.7: + expected incremental losses satisfy ``E[Z_{i,k}] = eta_i * gamma_k``, where + ``eta_i`` is exposure (``sample_weight``, e.g. premium) for accident year + ``i`` and ``gamma_k`` is an incremental loss ratio at development age ``k`` + that is common to all accident years. The fitted ``zeta_`` estimates those + common ``gamma_k``; unobserved incrementals are completed as + ``zeta_ * sample_weight``. Dollar ``incremental_`` differ by origin because + exposure differs; implied multiplicative ``ldf_`` are derived from the + completed incremental triangle and can also differ by origin. + Parameters ---------- trend: float (default=0.0) - A multiplicative trend amount used to trend each incremental development - period the valuation_date of the Triangle. + Implementation extension (not in Schmidt, 2006): multiplicative trend + applied to incremental losses before ``zeta_`` is estimated, trending + each development period to the triangle valuation date. future_trend: float (default=None) - The trend to apply to the incremental development periods in the lower - half of the completed Triangle. If None, then will be set to the value of - the trend parameter. + Implementation extension: trend applied when projecting incrementals + beyond the valuation date into the lower triangle. If None, uses + ``trend``. n_periods: integer, optional (default=-1) number of origin periods to be used in the ldf average calculation. For all origin periods, set n_periods=-1 @@ -54,12 +65,12 @@ class IncrementalAdditive(DevelopmentBase): The raw incrementals as a percent of exposure trended to the valuation date of the Triangle. Only those used in the fitting. zeta_: Triangle - The fitted incrementals as a percent of exposure trended to the valuation - date of the Triangle. + Fitted incremental loss ratios ``gamma_k`` (common across accident years) + as a percent of exposure, trended to the valuation date of the Triangle. cum_zeta_: Triangle The fitted cumulative percent of exposure trended to the valuation date of the Triangle - w_: ndarray + w_ : ndarray The weight used in the zeta fitting w_tri_: Triangle Triangle of w_ @@ -69,6 +80,88 @@ class IncrementalAdditive(DevelopmentBase): A triangle of full incremental values. + Examples + -------- + Schmidt (2006), Example F, uses the ``ia_sample`` triangle: cumulative + ``loss`` with latest ``exposure`` as ``sample_weight`` (premiums). Fitted + ``incremental_`` are dollars by origin and age; ``zeta_`` is one pattern + shared across origins; implied ``ldf_`` can still vary by origin. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + import numpy as np + + tri = cl.load_sample("ia_sample") + ia = cl.IncrementalAdditive().fit( + tri["loss"], sample_weight=tri["exposure"].latest_diagonal + ) + print(np.round(ia.incremental_.values[0, 0, -1, :], 0)) + print(np.round(ia.ldf_.values[0, 0, :3, :3], 4)) + + .. testoutput:: + + [1889. 1811. 1256. 1157. 740. 300.] + [[1.8531 1.3062 1.2332] + [1.8895 1.3191 1.2336] + [1.9233 1.3288 1.2301]] + + A volume-weighted estimate of the common ``gamma_k`` across origins, + multiplied by latest exposure, reproduces the fitted incrementals in the + lower triangle (here at age 72), as in Schmidt's additive predictors. + + .. testcode:: + + import numpy as np + + tri = cl.load_sample("ia_sample") + ia = cl.IncrementalAdditive().fit( + tri["loss"], sample_weight=tri["exposure"].latest_diagonal + ) + zeta = tri["loss"].cum_to_incr().sum("origin") / tri["exposure"].sum("origin") + projected = ( + zeta.values[0, 0, 0, -1] + * tri["exposure"].latest_diagonal.values[0, 0, -1, 0] + ) + fitted = ia.incremental_.values[0, 0, -1, -1] + print(np.isclose(projected, fitted)) + + .. testoutput:: + + True + + The ``trend`` and ``future_trend`` parameters are not part of Schmidt + (2006); they are chainladder extensions for trending incrementals before + fitting ``zeta_`` and when projecting the lower triangle. The effect is + material on projected dollars (not on cumulative link-ratio semantics). + + .. testcode:: + + import numpy as np + + tri = cl.load_sample("ia_sample") + sw = tri["exposure"].latest_diagonal + base = cl.IncrementalAdditive().fit(tri["loss"], sample_weight=sw) + trended = cl.IncrementalAdditive(trend=0.02, future_trend=0.05).fit( + tri["loss"], sample_weight=sw + ) + print(float(np.round(base.incremental_.values[0, 0, -1, -1], 0))) + print(float(np.round(trended.incremental_.values[0, 0, -1, -1], 0))) + + .. testoutput:: + + 300.0 + 383.0 + + References + ---------- + Schmidt, K. (2006). Methods and Models of Loss Reserving Based on Run-Off + Triangles: A Unifying Survey. CAS Forum, Fall 2006, Section 4.7 (Additive + Method). https://www.casact.org/sites/default/files/database/forum_06fforum_273.pdf + """ def __init__( diff --git a/chainladder/development/learning.py b/chainladder/development/learning.py index 041a9abc..29f9f1c4 100644 --- a/chainladder/development/learning.py +++ b/chainladder/development/learning.py @@ -12,27 +12,32 @@ class DevelopmentML(DevelopmentBase): - """ A Estimator that interfaces with machine learning (ML) tools that implement - the scikit-learn API. + """ Interface to scikit-learn estimators for loss development patterns. - The `DevelopmentML` estimator is used to generate ``ldf_`` patterns from - the data. + ``DevelopmentML`` lets reserving workflows use any sklearn-compatible + regressor (often inside a :class:`~sklearn.pipeline.Pipeline`). It converts + a :class:`~chainladder.Triangle` to a tabular design matrix, fits the ML + model, predicts through the terminal development age to complete the lower + triangle, and expresses the result as ``ldf_`` for tails and IBNR methods. + :class:`~chainladder.TweedieGLM` is a special case with + :class:`~sklearn.linear_model.TweedieRegressor` as the only ML step. .. versionadded:: 0.8.1 Parameters ---------- - estimator_ml: skearn Estimator + estimator_ml: sklearn Estimator Any sklearn compatible regression estimator, including Pipelines and y_ml: list or str or sklearn_transformer The response column(s) for the machine learning algorithm. It must be present within the Triangle. - autoregressive: tuple, (autoregressive_col_name, lag, source_col_name) - The subset of response column(s) to use as lagged features for the - Time Series aspects of the model. Predictions from one development period - get used as featues in the next development period. Lags should be negative - integers. + autoregressive: list of tuple + Each tuple is ``(feature_name, lag, source_column)``. ``feature_name`` must + also appear in the pipeline design matrix. ``DevelopmentML`` fills that + column with lagged ``source_column`` values and, when projecting forward, + replaces it with the prior development period's prediction. Lags should be + negative integers (for example ``-12`` on a monthly triangle is one year). weight_step: str Step name within estimator_ml that is weighted drop: tuple or list of tuples @@ -50,6 +55,118 @@ class DevelopmentML(DevelopmentBase): The estimated loss development patterns. cdf_: Triangle The estimated cumulative development patterns. + + Examples + -------- + Features from any triangle axis can enter an sklearn + :class:`~sklearn.compose.ColumnTransformer` or + :class:`~sklearn.pipeline.Pipeline`. On ``clrd`` grouped by line of business, + one-hot-encode ``LOB`` and ``development``, pass ``origin`` through, and fit + a linear model (the user guide uses ``RandomForestRegressor`` the same way). + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + import numpy as np + from sklearn.compose import ColumnTransformer + from sklearn.linear_model import LinearRegression + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import OneHotEncoder + + clrd = cl.load_sample("clrd").groupby("LOB").sum()["CumPaidLoss"] + design_matrix = ColumnTransformer( + transformers=[ + ("dummy", OneHotEncoder(drop="first"), ["LOB", "development"]), + ("passthrough", "passthrough", ["origin"]), + ] + ) + estimator_ml = Pipeline( + steps=[ + ("design_matrix", design_matrix), + ("model", LinearRegression()), + ] + ) + m = cl.DevelopmentML(estimator_ml=estimator_ml, y_ml="CumPaidLoss").fit( + clrd + ) + print(m.ldf_.shape) + print(np.round(m.ldf_.values[0, 0, 0, :4], 4)) + + .. testoutput:: + + (6, 1, 10, 9) + [1.7448 0.9854 0.8117 0.6495] + + ``fit_incrementals`` chooses whether the ML response is built from + incremental or cumulative triangle values before ``ldf_`` is derived. + + .. testcode:: + + import numpy as np + from sklearn.linear_model import LinearRegression + from sklearn.pipeline import Pipeline + + from chainladder.utils.utility_functions import PatsyFormula + + tri = cl.load_sample("genins") + pipe = Pipeline( + steps=[ + ("design_matrix", PatsyFormula("C(development)")), + ("model", LinearRegression(fit_intercept=False)), + ] + ) + m_incr = cl.DevelopmentML( + pipe, y_ml=[tri.columns[0]], fit_incrementals=True + ).fit(tri) + m_cum = cl.DevelopmentML( + pipe, y_ml=[tri.columns[0]], fit_incrementals=False + ).fit(tri) + print(float(np.round(m_incr.ldf_.values[0, 0, 0, 0], 4))) + print(float(np.round(m_cum.ldf_.values[0, 0, 0, 0], 4))) + + .. testoutput:: + + 3.508 + 3.515 + + Autoregressive features use prior development predictions as covariates. + The lag column must be named in both ``autoregressive`` and the pipeline + (for example in a :class:`~chainladder.PatsyFormula`). + + .. testcode:: + + import numpy as np + from sklearn.linear_model import LinearRegression + from sklearn.pipeline import Pipeline + + from chainladder.utils.utility_functions import PatsyFormula + + tri = cl.load_sample("raa") + col = tri.columns[0] + pipe = Pipeline( + steps=[ + ( + "design_matrix", + PatsyFormula("C(development) + pred_lag"), + ), + ("model", LinearRegression(fit_intercept=False)), + ] + ) + m = cl.DevelopmentML( + pipe, + y_ml=col, + fit_incrementals=True, + autoregressive=[("pred_lag", -12, col)], + ).fit(tri) + print(float(np.round(m.ldf_.values[0, 0, 0, 0], 4))) + + .. testoutput:: + + 3.0297 + """ def __init__(self, estimator_ml=None, y_ml=None, autoregressive=False, @@ -238,4 +355,4 @@ def transform(self, X): X_new.ldf_.valuation_date = pd.to_datetime(options.ULT_VAL) X_new._set_slicers() X_new.predicted_data_ = predicted_data - return X_new \ No newline at end of file + return X_new diff --git a/chainladder/development/outstanding.py b/chainladder/development/outstanding.py index ffaa5528..4099eca0 100644 --- a/chainladder/development/outstanding.py +++ b/chainladder/development/outstanding.py @@ -11,15 +11,18 @@ class CaseOutstanding(DevelopmentBase): - """A determinisic method based on outstanding case reserves. + """ Deterministic development from prior-lag case reserves. - The CaseOutstanding method is a deterministic approach that develops - patterns of incremental payments as a percent of previous period case - reserves as well as patterns for case reserves as a percent of previous - period case reserves. Although the patterns produces by the approach - approximate incremental payments and case outstanding, they are converted - into comparable multiplicative patterns for usage with the various IBNR - methods. + Estimates incremental paid amounts and case-reserve runoff as fractions of + the prior lag's carried case reserve. Like + :class:`~chainladder.MunichAdjustment` and + :class:`~chainladder.BerquistSherman`, this is useful when case reserves + should inform paid ultimates. A triangle with both paid and incurred columns + is required. + + The incremental ``paid_ldf_`` patterns are not multiplicative link ratios; + the estimator also builds origin-specific implied multiplicative ``ldf_`` + so standard IBNR methods can be applied. .. versionadded:: 0.8.0 @@ -33,22 +36,84 @@ class CaseOutstanding(DevelopmentBase): all origin periods, set paid_n_periods=-1 case_n_periods: integer, optional (default=-1) number of origin periods to be used in the case pattern averages. For - all origin periods, set paid_n_periods=-1 + all origin periods, set case_n_periods=-1 Attributes ---------- ldf_: Triangle - The estimated (multiplicative) loss development patterns. + Implied multiplicative loss development patterns (by paid/incurred + column); each origin period has its own pattern. cdf_: Triangle The estimated (multiplicative) cumulative development patterns. case_to_prior_case_: Triangle - The case to prior case ratios used for fitting the estimator - case_ldf_: - The selected case to prior case ratios of the fitted estimator + Case-to-prior-case incremental ratios by origin (for review). + case_ldf_: Triangle + Selected case-to-prior-case ratios averaged across origins. paid_to_prior_case_: Triangle - The paid to prior case ratios used for fitting the estimator - paid_ldf_: - The selected paid to prior case ratios of the fitted estimator + Paid-to-prior-case incremental ratios by origin (for review). + paid_ldf_: Triangle + Selected paid-to-prior-case ratios averaged across origins. + + Examples + -------- + On ``usauto``, incremental paid in 12–24 is about 84% of case outstanding + at lag 12 (first entry in ``paid_ldf_`` at development 24–36): + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + import numpy as np + + tri = cl.load_sample("usauto") + model = cl.CaseOutstanding( + paid_to_incurred=("paid", "incurred") + ).fit(tri) + print(np.round(model.paid_ldf_.values[0, 0, 0, :4], 4)) + + .. testoutput:: + + [0.8428 0.71 0.7084 0.6968] + + Implied multiplicative ``ldf_`` differ by accident year; the 1998 origin + paid pattern is shown below (compare to volume-weighted chainladder). + + .. testcode:: + + import numpy as np + + tri = cl.load_sample("usauto") + model = cl.CaseOutstanding( + paid_to_incurred=("paid", "incurred") + ).fit(tri) + print(np.round(model.ldf_["paid"].values[0, 0, 0, :4], 4)) + + .. testoutput:: + + [1.7925 1.2056 1.0956 1.0457] + + Review origin-level ``paid_to_prior_case_`` and ``case_to_prior_case_`` + when tuning ``paid_n_periods`` and ``case_n_periods``; fitted selections + appear in ``paid_ldf_`` and ``case_ldf_``. + + .. testcode:: + + import numpy as np + + tri = cl.load_sample("usauto") + model = cl.CaseOutstanding( + paid_to_incurred=("paid", "incurred") + ).fit(tri) + print(np.round(model.case_to_prior_case_.values[0, 0, 0, :4], 4)) + print(np.round(model.case_ldf_.values[0, 0, 0, :4], 4)) + + .. testoutput:: + + [0.5378 0.5541 0.5253 0.4981] + [0.534 0.5638 0.5296 0.49 ] + """ def __init__( @@ -65,7 +130,7 @@ def fit(self, X, y=None, sample_weight=None): Parameters ---------- X : Triangle - Set of LDFs to which the munich adjustment will be applied. + Triangle with paid and incurred columns for ``paid_to_incurred``. y : Ignored sample_weight : Ignored diff --git a/chainladder/methods/mack.py b/chainladder/methods/mack.py index cfcf71e0..f2d9466b 100644 --- a/chainladder/methods/mack.py +++ b/chainladder/methods/mack.py @@ -42,21 +42,22 @@ class MackChainladder(Chainladder): Examples -------- - Fit the Mack chainladder method and inspect the headline summary table, - which combines the deterministic chainladder estimate with Mack's - stochastic standard error. + Use ``MackChainladder`` when the IBNR point estimate alone is not + sufficient and a measure of reserve uncertainty is also needed. + ``summary_`` shows the deterministic chainladder ultimate alongside + Mack's per-origin prediction error. - .. testsetup: + .. testsetup:: import chainladder as cl - .. testcode: + .. testcode:: tr = cl.load_sample('ukmotor') model = cl.MackChainladder().fit(tr) print(model.summary_) - .. testoutput: + .. testoutput:: Latest IBNR Ultimate Mack Std Err 2007 12690.0 NaN 12690.000000 NaN @@ -67,18 +68,37 @@ class MackChainladder(Chainladder): 2012 9650.0 7162.150646 16812.150646 693.166178 2013 6283.0 14396.919151 20679.919151 901.408385 - The deterministic chainladder ultimates match those of - :class:`Chainladder`. Mack's contribution is the stochastic standard error - in the rightmost column, which can be aggregated across origins. + ``total_mack_std_err_`` aggregates the prediction error across all + origins. It exceeds the quadrature sum of the per-origin errors in + ``summary_`` because parameter risk is correlated across origins: all + origins share the same estimated age-to-age factors. - .. testcode: + .. testcode:: print(model.total_mack_std_err_) - .. testoutput: + .. testoutput:: columns values (Total,) 1424.531543 + + The Mack standard error is sensitive to how the upstream development + factors were estimated. Using simple (unweighted) averaging in + :class:`Development` before fitting ``MackChainladder`` gives equal + weight to each accident year regardless of size. On a small triangle + this raises the aggregate standard error relative to volume weighting, + since thinner years contribute more uncertainty. + + .. testcode:: + + tr = cl.load_sample("ukmotor") + tr_simple = cl.Development(average="simple").fit_transform(tr) + print(cl.MackChainladder().fit(tr_simple).total_mack_std_err_) + + .. testoutput:: + + columns values + (Total,) 1591.603339 """ def fit(self, X, y=None, sample_weight=None): @@ -98,8 +118,10 @@ def fit(self, X, y=None, sample_weight=None): Examples -------- - Fitting attaches the ``ultimate_`` and Mack std error attributes to - the estimator and returns the estimator itself. + After fitting, ``ibnr_`` holds the point estimate per origin and + ``mack_std_err_`` holds the prediction error. The ratio of the two + gives a coefficient of variation that shows which origins carry the + most reserve uncertainty relative to their size. .. testsetup:: @@ -107,12 +129,24 @@ def fit(self, X, y=None, sample_weight=None): .. testcode:: + import numpy as np + tr = cl.load_sample('ukmotor') - cl.MackChainladder().fit(tr) + model = cl.MackChainladder().fit(tr) + print(model.ibnr_.to_frame(origin_as_datetime=False).round(1)) + print(np.round(model.mack_std_err_.values[0, 0, :, -1], 1)) .. testoutput:: - MackChainladder() + 2261 + 2007 NaN + 2008 350.9 + 2009 1037.5 + 2010 2044.9 + 2011 3663.4 + 2012 7162.2 + 2013 14396.9 + [ nan 27.2 36.5 144.5 427.6 693.2 901.4] """ super().fit(X, y, sample_weight) if "sigma_" not in self.X_: @@ -152,8 +186,11 @@ def predict(self, X, sample_weight=None): Examples -------- - Fit the model and apply it to a Triangle with the same shape, then - read the Mack standard error off the resulting Triangle. + ``predict`` re-applies the fitted age-to-age factors and sigma + estimates to a new triangle without refitting. A common use is + sensitivity testing: scale the reported losses by an adverse factor + and call ``predict`` to see how the Mack standard error responds, + holding the development pattern fixed. .. testsetup:: @@ -163,13 +200,16 @@ def predict(self, X, sample_weight=None): tr = cl.load_sample('ukmotor') model = cl.MackChainladder().fit(tr) - predicted = model.predict(tr) - print(predicted.total_mack_std_err_) + tr_adverse = tr * 1.05 + print(model.predict(tr).total_mack_std_err_) + print(model.predict(tr_adverse).total_mack_std_err_) .. testoutput:: columns values (Total,) 1424.531543 + columns values + (Total,) 1475.539173 """ X_new = super().predict(X, sample_weight) X_new.sigma_ = getattr(X_new, "sigma_", self.X_.sigma_) @@ -217,7 +257,7 @@ def full_std_err_(self): model = cl.MackChainladder().fit(tr) print(model.full_std_err_) - .. testoutput + .. testoutput:: 12 24 36 48 60 72 84 2007 0.047826 0.040745 0.031412 0.010337 0.001431 0.001523 0.0 @@ -268,6 +308,7 @@ def total_process_risk_(self): -------- .. testsetup:: + import chainladder as cl .. testcode:: @@ -340,9 +381,11 @@ def mack_std_err_(self): error per origin. .. testsetup:: + import chainladder as cl .. testcode:: + tr = cl.load_sample('ukmotor') model = cl.MackChainladder().fit(tr) print(model.mack_std_err_.iloc[..., -3:, -3:])