Skip to content

Commit 238db22

Browse files
danielfong-actdanielfong-act
andauthored
Barnett Zehnwirth documentation and formulation rework (#657)
* Changed basic formuation of PTF in development.ipynb to correspond with implementation. Added note about difference in formulation. Fixed valuation (iota) summation term. * Added documentation for utils.utility_functions.PTF_formula with example from 2008 paper. Updated reference list * changes how development and valuation bounds are given to PTF_formula. More changes to BZ documentation. * reworked pathological BZ example (no valuation coefficients). Added graphs from 2008 paper for 'reasonable model' * PTF_formula now takes lists. Alphas are the start of every origin bucket. Gammas/iotas are the endpoints of each linear piece. Updated test_barnzehn accordingly * updated docs to reflect changes to PTF_formula * More checks for how BZ parameters are passed. Moved PTF_formula to within BZ estimator (called during fit). Changed how gamma is passed: index from 0 and increment grain-agnostic. Docs not updated yet * updated user_guide/development to reflect PTF_formula integration into BZ * moved PTF_formula into PatsyFormula * fix typo * Revert "fix typo" This reverts commit df7d310. * Revert "moved PTF_formula into PatsyFormula" This reverts commit 840bc4f. * fix typo and clarify formulation when using patsy directly --------- Co-authored-by: danielfong-act <danielfong247@gmial.com>
1 parent ad4d081 commit 238db22

6 files changed

Lines changed: 5165 additions & 3160 deletions

File tree

chainladder/development/barnzehn.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@
99
from chainladder.development.glm import TweedieGLM
1010
from sklearn.linear_model import LinearRegression
1111
from sklearn.pipeline import Pipeline
12-
import warnings
13-
from chainladder.utils.utility_functions import PatsyFormula
14-
from patsy import ModelDesc
15-
12+
from chainladder.utils.utility_functions import PatsyFormula, PTF_formula
1613

1714
class BarnettZehnwirth(TweedieGLM):
1815
""" This estimator enables modeling from the Probabilistic Trend Family as
@@ -31,21 +28,38 @@ class BarnettZehnwirth(TweedieGLM):
3128
response: str
3229
Column name for the reponse variable of the GLM. If ommitted, then the
3330
first column of the Triangle will be used.
34-
31+
alpha: list of int
32+
List of origin periods denoting the first indices of each group
33+
gamma: list of int
34+
iota: list of int
3535
3636
"""
3737

38-
def __init__(self, drop=None,drop_valuation=None,formula='C(origin) + development', response=None):
38+
def __init__(self, drop=None,drop_valuation=None,formula=None, response=None, alpha=None, gamma=None, iota=None):
3939
self.drop = drop
4040
self.drop_valuation = drop_valuation
41-
self.formula = formula
42-
self.response = response
4341

42+
self.response = response
43+
if formula and (alpha or gamma or iota):
44+
raise ValueError("Model can only be specified by either a formula or some combination of alpha, gamma and iota.")
45+
if not (formula or alpha or gamma or iota):
46+
raise ValueError("Model must be specified, either a formula or some combination of alpha, gamma and iota.")
47+
for Greek in [alpha,gamma,iota]:
48+
if Greek:
49+
if not ( (type(Greek) is list) and all(type(bound) is int for bound in Greek) ):
50+
raise ValueError("Alpha, gamma and iota must be given as lists of integers, specifying periods.")
51+
self.formula = formula
52+
self.alpha = alpha
53+
self.gamma = gamma
54+
self.iota = iota
55+
4456
def fit(self, X, y=None, sample_weight=None):
4557
if max(X.shape[:2]) > 1:
4658
raise ValueError("Only single index/column triangles are supported")
4759
tri = X.cum_to_incr().log()
4860
response = X.columns[0] if not self.response else self.response
61+
if(not self.formula):
62+
self.formula = PTF_formula(self.alpha,self.gamma,self.iota,dgrain=min(tri.development))
4963
self.model_ = DevelopmentML(Pipeline(steps=[
5064
('design_matrix', PatsyFormula(self.formula)),
5165
('model', LinearRegression(fit_intercept=False))]),

chainladder/development/tests/test_barnzehn.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import numpy as np
22
import chainladder as cl
33
import pytest
4-
from chainladder.utils.utility_functions import PTF_formula
54
abc = cl.load_sample('abc')
65

76
def test_basic_bz():
@@ -40,13 +39,11 @@ def test_bz_2008():
4039
exposure=np.array([[2.2], [2.4], [2.2], [2.0], [1.9], [1.6], [1.6], [1.8], [2.2], [2.5], [2.6]])
4140
abc_adj = abc/exposure
4241

43-
origin_buckets = [(0,1),(2,2),(3,4),(5,10)]
44-
dev_buckets = [(24,36),(36,48),(48,84),(84,108),(108,144)]
45-
val_buckets = [(1,8),(8,9),(9,12)]
46-
47-
abc_formula = PTF_formula(abc_adj,alpha=origin_buckets,gamma=dev_buckets,iota=val_buckets)
48-
49-
model=cl.BarnettZehnwirth(formula=abc_formula, drop=('1982',72)).fit(abc_adj)
42+
origin_buckets = [0,2,3,5]
43+
dev_buckets = [0,1,2,5,7,10]
44+
val_buckets = [0,7,8,11]
45+
46+
model=cl.BarnettZehnwirth(drop=('1982',72),alpha=origin_buckets,gamma=dev_buckets,iota=val_buckets).fit(abc_adj)
5047
assert np.all(
5148
np.around(model.coef_.values,4).flatten()
5249
== np.array([11.1579,0.1989,0.0703,0.0919,0.1871,-0.3771,-0.4465,-0.3727,-0.3154,0.0432,0.0858,0.1464])

chainladder/utils/utility_functions.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -769,27 +769,30 @@ def model_diagnostics(model, name=None, groupby=None):
769769
return concat(triangles, 0)
770770

771771

772-
def PTF_formula(tri: Triangle, alpha: ArrayLike = None, gamma: ArrayLike = None, iota: ArrayLike = None):
772+
def PTF_formula(alpha: list = None, gamma: list = None, iota: list = None,dgrain: int = 12):
773773
""" Helper formula that builds a patsy formula string for the BarnettZehnwirth
774774
estimator. Each axis's parameters can be grouped together. Groups of origin
775-
parameters (alpha) are set equal, and are specified by a ranges (inclusive).
775+
parameters (alpha) are set equal, and are specified by the first period in each bin.
776776
Groups of development (gamma) and valuation (iota) parameters are fit to
777-
separate linear trends, specified as tuples denoting ranges with shared endpoints.
777+
separate linear trends, specified a list denoting the endpoints of the linear pieces.
778778
In other words, development and valuation trends are fit to a piecewise linear model.
779779
A triangle must be supplied to provide some critical information.
780780
"""
781781
formula_parts=[]
782782
if(alpha):
783783
# The intercept term takes the place of the first alpha
784784
for ind,a in enumerate(alpha):
785-
if(a[0]==0):
785+
if(a==0):
786786
alpha=alpha[:ind]+alpha[(ind+1):]
787-
formula_parts += ['+'.join([f'I({x[0]} <= origin)' for x in alpha])]
788-
if(gamma):
789-
dgrain = min(tri.development)
790-
formula_parts += ['+'.join([f'I((np.minimum({x[1]-dgrain},development) - np.minimum({x[0]-dgrain},development))/{dgrain})' for x in gamma])]
787+
formula_parts += ['+'.join([f'I({x} <= origin)' for x in alpha])]
788+
if(gamma):
789+
# preprocess gamma to align with grain
790+
graingamma = [(i+1)*dgrain for i in gamma]
791+
for ind in range(1,len(graingamma)):
792+
formula_parts += ['+'.join([f'I((np.minimum({graingamma[ind]},development) - np.minimum({graingamma[ind-1]},development))/{dgrain})'])]
791793
if(iota):
792-
formula_parts += ['+'.join([f'I(np.minimum({x[1]-1},valuation) - np.minimum({x[0]-1},valuation))' for x in iota])]
794+
for ind in range(1,len(iota)):
795+
formula_parts += ['+'.join([f'I(np.minimum({iota[ind]},valuation) - np.minimum({iota[ind-1]},valuation))'])]
793796
if(formula_parts):
794797
return '+'.join(formula_parts)
795798
return ''
127 KB
Loading

docs/library/references.bib

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,11 @@ @article{shapland2016
8787
year = {2016},
8888
url = {https://live-casact.pantheonsite.io/sites/default/files/2021-02/04-shapland.pdf}
8989
}
90+
91+
@article{barnett2008,
92+
author = {Barnett, G. and Zehnwirth, B.},
93+
title = {Modeling with the {M}ultivariate {P}robabilistic {T}rend {F}amily},
94+
journal = {Casualty Actuarial Society E-Forum},
95+
year = {2008},
96+
url = {https://www.casact.org/sites/default/files/database/forum_08fforum_3barnett_zehnwirth.pdf}
97+
}

0 commit comments

Comments
 (0)