Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
d111c92
Setting up geometric mean
kennethshsu May 14, 2026
9fa828e
Removed some useless stuff
kennethshsu May 15, 2026
85f19eb
Groundwork for implementation
kennethshsu May 15, 2026
6ae3f72
Flow control geometric vs not
kennethshsu May 15, 2026
7c1e4a1
Calculating gmean
kennethshsu May 15, 2026
e447647
Got geo_means link ratios now
kennethshsu May 15, 2026
163f4dd
Correct LDFs, but wrong dimension
kennethshsu May 16, 2026
11efa44
Corrected shape
kennethshsu May 16, 2026
60a2e01
Added column names
kennethshsu May 16, 2026
e882380
Matching dimension on ldf_ now
kennethshsu May 18, 2026
108e579
Added a new geometric test
kennethshsu May 18, 2026
c97eae1
Removed debugger
kennethshsu May 18, 2026
62f3188
Added simple average test
kennethshsu May 18, 2026
9233a9a
Revised to lastn instead of last4
kennethshsu May 18, 2026
ce967f4
Preping another blended test
kennethshsu May 18, 2026
d96e972
typo n
kennethshsu May 18, 2026
51a69ed
Added the flipped test
kennethshsu May 18, 2026
d44c377
added tests for sigma and std error to be nans
kennethshsu May 18, 2026
1e4e219
Commented out debugger
kennethshsu May 18, 2026
9f633bf
added explict else do nothing
kennethshsu May 18, 2026
e17ec5d
Took out incorrect reassignment of LDFs
kennethshsu May 18, 2026
2093bf8
need to fix std residuals
kennethshsu May 18, 2026
bffd01d
Added std residuals test
kennethshsu May 18, 2026
65c4aaa
Resetting std residuals to nans
kennethshsu May 18, 2026
31dbf82
Clean up
kennethshsu May 18, 2026
028cb7a
Cleaned up, improving code readiability
kennethshsu May 18, 2026
627eed4
Merge branch 'main' of https://github.com/casact/chainladder-python i…
kennethshsu May 18, 2026
bd546ac
Fixes bugbot
kennethshsu May 18, 2026
ab6b7ec
Debugger for weited regression
kennethshsu May 19, 2026
d07fbae
Generalizing the weighted regression through origin
kennethshsu May 19, 2026
f7a7927
Stash
kennethshsu May 20, 2026
78e382b
Debug
kennethshsu May 19, 2026
af071cb
Fixed w
kennethshsu May 20, 2026
9e25a26
Moving expoent into OLS
kennethshsu May 20, 2026
95d2407
Geometric averaging using regression framework
kennethshsu May 20, 2026
f98aa3d
debugger, stashing
kennethshsu May 20, 2026
eab7a97
Working geometric in regression form
kennethshsu May 20, 2026
9d1b506
Commenting out the old manual calculation
kennethshsu May 20, 2026
b212a15
Added in y weights
kennethshsu May 20, 2026
de1df8f
Removed the sigma, std error, and residuals tests, since we actually …
kennethshsu May 20, 2026
746fb25
Prettify code
kennethshsu May 20, 2026
71d9cbf
Cleaned up flow
kennethshsu May 20, 2026
8e5607b
line break
kennethshsu May 20, 2026
fa0f837
Merge branch 'main' of https://github.com/casact/chainladder-python i…
kennethshsu May 20, 2026
e1f5195
Resolving bugbot
kennethshsu May 20, 2026
8b57a68
Removed old stuff
kennethshsu May 20, 2026
54fdad6
Fixing the standard residual calculation
kennethshsu May 20, 2026
6cc256f
Added tests for sigma and other terms
kennethshsu May 20, 2026
2f80513
Missing new line
kennethshsu May 20, 2026
0f27cd6
removed the stupid whitespace
kennethshsu May 20, 2026
6e22290
Fixing the stderror error
kennethshsu May 21, 2026
742b896
Bugbot fix - thanks good bot
kennethshsu May 21, 2026
9f062a3
Remove stale weighted regression residual cache
cursoragent May 21, 2026
d9b1e8b
Added a geometric branch, else skip
kennethshsu May 21, 2026
079338c
Restoring is_geo
kennethshsu May 21, 2026
2737959
Merge branch 'main' of https://github.com/casact/chainladder-python i…
kennethshsu May 21, 2026
81cef7e
Added a warning for zeros
kennethshsu May 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 15 additions & 29 deletions chainladder/development/development.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
Callable,
Literal,
# Self, # Make use of this once Python 3.10 is deprecated.
TYPE_CHECKING
TYPE_CHECKING,
)

if TYPE_CHECKING:
Expand All @@ -36,7 +36,7 @@ class Development(DevelopmentBase):
all origin periods, set n_periods = -1
average: string or float, optional (default = 'volume')
type of averaging to use for ldf average calculation. Options include
'volume', 'simple', and 'regression'. If numeric values are supplied,
'volume', 'simple', 'regression', and 'geometric'. If numeric values are supplied,
then (2-average) in the style of Zehnwirth & Barnett is used
for the exponent of the regression weights.
sigma_interpolation: string optional (default = 'log-linear')
Expand Down Expand Up @@ -92,8 +92,8 @@ class Development(DevelopmentBase):
def __init__(
self,
n_periods: int = -1,
average: Literal['volume', 'simple', 'regression'] = "volume",
sigma_interpolation: Literal['log-linear', 'mack'] = "log-linear",
average: Literal["volume", "simple", "regression", "geometric"] = "volume",
Comment thread
henrydingliu marked this conversation as resolved.
sigma_interpolation: Literal["log-linear", "mack"] = "log-linear",
drop: tuple | list[tuple] | None = None,
drop_high: bool | int | list[bool] | list[int] | None = None,
drop_low: bool | int | list[bool] | list[int] | None = None,
Expand All @@ -120,11 +120,7 @@ def __init__(
# Undeclared until fitted attributes - scikit-learn convention.
self.average_: np.ndarray

def fit(
self, X: TriangleLike,
y: None = None,
sample_weight: None = None
):
def fit(self, X: TriangleLike, y: None = None, sample_weight: None = None):
"""Fit the model with X.

Parameters
Expand Down Expand Up @@ -164,44 +160,36 @@ def fit(
x: ArrayLike
y: ArrayLike
x, y = tri_array[..., :-1], tri_array[..., 1:]
exponent: ArrayLike = xp.array(
[{"regression": 0, "volume": 1, "simple": 2}[x] for x in average_[0, 0, 0]]
)
exponent = xp.nan_to_num(exponent * (y * 0 + 1))

link_ratio: ArrayLike = y / x

if hasattr(X, "w_v2_"):
self.w_v2_ = self._set_weight_func(
factor=obj.age_to_age * X.w_v2_,
# secondary_rank=obj.iloc[..., :-1, :-1]
)
else:
self.w_v2_ = self._set_weight_func(
factor=obj.age_to_age,
# secondary_rank=obj.iloc[..., :-1, :-1]
)

self.w_ = self._assign_n_periods_weight(
obj, n_periods_
) * self._drop_adjustment(obj, link_ratio)
w = num_to_nan(self.w_ / (x ** (exponent)))

params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(x, y, w)
params = WeightedRegression(axis=2, thru_orig=True, xp=xp).fit(
x, y, self.w_, average_
)
Comment thread
cursor[bot] marked this conversation as resolved.

if self.n_periods != 1:
params = params.sigma_fill(self.sigma_interpolation)
params.sigma_fill(self.sigma_interpolation).std_err_fill()
w_reg = params._w_reg
else:
warnings.warn(
"Setting n_periods=1 does not allow enough degrees "
"of freedom to support calculation of all regression"
" statistics. Only LDFs have been calculated."
Comment thread
cursor[bot] marked this conversation as resolved.
"of freedom to support calculation of all regression "
"statistics. Only LDFs have been calculated."
)

params.std_err_ = xp.nan_to_num(params.std_err_) + xp.nan_to_num(
(1 - xp.nan_to_num(params.std_err_ * 0 + 1))
Comment thread
cursor[bot] marked this conversation as resolved.
* params.sigma_
/ xp.swapaxes(xp.sqrt(x ** (2 - exponent))[..., 0:1, :], -1, -2)
)
w_reg = params._w_reg

params = xp.concatenate((params.slope_, params.sigma_, params.std_err_), 3)
params = xp.swapaxes(params, 2, 3)
Expand All @@ -211,14 +199,12 @@ def fit(
self.std_err_ = self._param_property(obj, params, 2)

resid = -obj.iloc[..., :-1] * self.ldf_.values + obj.iloc[..., 1:].values
std = xp.sqrt((1 / num_to_nan(w)) * (self.sigma_**2).values)
std = xp.sqrt((1 / num_to_nan(w_reg)) * (self.sigma_**2).values)
resid = resid / num_to_nan(std)
self.std_residuals_ = resid[resid.valuation < obj.valuation_date].fillzero()

return self



def transform(self, X):
"""If X and self are of different shapes, align self to X, else
return self.
Expand Down
262 changes: 259 additions & 3 deletions chainladder/development/tests/test_development.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,7 @@ def test_new_drop_7(clrd):
clrd = clrd.groupby("LOB")[["IncurLoss", "CumPaidLoss"]].sum()
# drop_above/below with preserve
with pytest.warns(UserWarning, match="exclusions have been ignored"):
dev = cl.Development(
drop_above=1.01, drop_below=0.95, preserve=3
).fit(clrd)
dev = cl.Development(drop_above=1.01, drop_below=0.95, preserve=3).fit(clrd)
compare_new_drop(dev, clrd)


Expand Down Expand Up @@ -393,6 +391,264 @@ def test_new_drop_10():
)


def test_geometric_avg():
tri = cl.load_sample("friedland_us_industry_auto")["Reported Claims"]
df = tri.link_ratio.to_frame()

lhs = np.round(
cl.Development(n_periods=4, average="geometric")
.fit_transform(tri)
.ldf_.to_frame()
.values.flatten(),
6,
)

def geo_lastn(s, n):
vals = s.dropna().tail(n)
return vals.prod() ** (1 / len(vals)) if len(vals) > 0 else np.nan

geo_means = df.apply(lambda s: geo_lastn(s, 4))
rhs = np.round(geo_means.values.flatten(), 6)

assert np.all(lhs == rhs)


def test_simple_avg():
tri = cl.load_sample("friedland_us_industry_auto")["Reported Claims"]
df = tri.link_ratio.to_frame()

lhs = np.round(
cl.Development(n_periods=4, average="simple")
.fit_transform(tri)
.ldf_.to_frame()
.values.flatten(),
6,
)

def sim_lastn(s, n):
vals = s.dropna().tail(n)
return vals.mean() if len(vals) > 0 else np.nan

avg_means = df.apply(lambda s: sim_lastn(s, 4))
rhs = np.round(avg_means.values.flatten(), 6)

assert np.all(lhs == rhs)


def test_simple_geometric_avg():
tri = cl.load_sample("friedland_us_industry_auto")["Reported Claims"]
df = tri.link_ratio.to_frame()

lhs = np.round(
cl.Development(
n_periods=4,
average=[
"geometric",
"simple",
"geometric",
"simple",
"geometric",
"simple",
"geometric",
"simple",
"geometric",
],
)
.fit_transform(tri)
.ldf_.to_frame()
.values.flatten(),
6,
)

def sim_lastn(s, n):
Comment thread
henrydingliu marked this conversation as resolved.
vals = s.dropna().tail(n)
return vals.mean() if len(vals) > 0 else np.nan

def geo_lastn(s, n):
vals = s.dropna().tail(n)
return vals.prod() ** (1 / len(vals)) if len(vals) > 0 else np.nan

sim_avg = df.apply(lambda s: s.dropna().tail(4).mean())
geo_avg = df.apply(
lambda s: s.dropna().tail(4).prod() ** (1 / len(s.dropna().tail(4)))
)

methods = np.array(
[
"geometric",
"simple",
"geometric",
"simple",
"geometric",
"simple",
"geometric",
"simple",
"geometric",
]
)

rhs = np.round(np.where(methods == "geometric", geo_avg.values, sim_avg.values), 6)

assert np.all(lhs == rhs)


def test_simple_geometric_avg2():
tri = cl.load_sample("friedland_us_industry_auto")["Reported Claims"]
df = tri.link_ratio.to_frame()

lhs = np.round(
cl.Development(
n_periods=4,
average=[
"simple",
"geometric",
"simple",
"geometric",
"simple",
"geometric",
"simple",
"geometric",
"simple",
],
)
.fit_transform(tri)
.ldf_.to_frame()
.values.flatten(),
6,
)

def sim_lastn(s, n):
vals = s.dropna().tail(n)
return vals.mean() if len(vals) > 0 else np.nan

def geo_lastn(s, n):
vals = s.dropna().tail(n)
return vals.prod() ** (1 / len(vals)) if len(vals) > 0 else np.nan

sim_avg = df.apply(lambda s: s.dropna().tail(4).mean())
geo_avg = df.apply(
lambda s: s.dropna().tail(4).prod() ** (1 / len(s.dropna().tail(4)))
)

methods = np.array(
[
"simple",
"geometric",
"simple",
"geometric",
"simple",
"geometric",
"simple",
"geometric",
"simple",
]
)

rhs = np.round(np.where(methods == "geometric", geo_avg.values, sim_avg.values), 6)

assert np.all(lhs == rhs)


def test_sigma():
tri = cl.load_sample("friedland_us_industry_auto")["Reported Claims"]
sigma = np.round(
cl.Development(
n_periods=4,
average="simple",
)
.fit_transform(tri)
.sigma_.to_frame()
.values.flatten(),
6,
)
sigma_expected = [
0.006371,
0.001693,
0.001274,
0.001823,
0.000612,
0.000349,
0.000371,
0.000212,
0.000128,
]

assert np.all(sigma == sigma_expected)


def test_stderror():
tri = cl.load_sample("friedland_us_industry_auto")["Reported Claims"]
std_error = np.round(
cl.Development(
n_periods=4,
average="simple",
)
.fit_transform(tri)
.std_err_.to_frame()
.values.flatten(),
6,
)
std_error_expected = [
0.003186,
0.000847,
0.000637,
0.000912,
0.000306,
0.000175,
0.000214,
0.00015,
0.000128,
]
assert np.all(std_error == std_error_expected)


def test_std_residuals():
tri = cl.load_sample("friedland_us_industry_auto")["Reported Claims"]
std_residuals = np.round(
cl.Development(
n_periods=4,
average="simple",
)
.fit_transform(tri)
.std_residuals_.to_frame()
.values,
6,
)
std_residuals_expected = [
[0.0, 0.0, 0.0, 0.0, 0.0, -1.342157, -1.144874, 0.707107, 0.0],
[0.0, 0.0, 0.0, 0.0, -0.847023, 0.576855, 0.702623, -0.707107, np.nan],
[0.0, 0.0, 0.0, -0.74519, 1.213508, 0.917912, 0.442251, np.nan, np.nan],
[0.0, 0.0, -0.251713, 1.337416, 0.426182, -0.15261, np.nan, np.nan, np.nan],
[0.0, 1.426642, 1.023636, 0.194113, -0.792667, np.nan, np.nan, np.nan, np.nan],
[
-0.19898,
-0.056297,
0.505912,
-0.78634,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
],
[
-0.727376,
-0.791472,
-1.277835,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
],
[-0.537388, -0.578874, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[1.463744, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
]

assert np.array_equal(std_residuals, std_residuals_expected, equal_nan=True)


def compare_new_drop(dev, tri):
assert np.array_equal(
dev._set_weight_func(tri.age_to_age, tri.age_to_age).values,
Expand Down
Loading
Loading