From 8279e372c46cbed6d1dd677c34cf0daa4342f147 Mon Sep 17 00:00:00 2001 From: Colladito Date: Sat, 11 Oct 2025 17:44:09 +0200 Subject: [PATCH 01/13] [VCF] Change optimizer method from trust-constr to L-BFGS-B --- bluemath_tk/distributions/nonstat_gev.py | 191 ++++++++++++++--------- 1 file changed, 116 insertions(+), 75 deletions(-) diff --git a/bluemath_tk/distributions/nonstat_gev.py b/bluemath_tk/distributions/nonstat_gev.py index d39238f..b682e2a 100644 --- a/bluemath_tk/distributions/nonstat_gev.py +++ b/bluemath_tk/distributions/nonstat_gev.py @@ -816,6 +816,7 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False) -> dict: if self.AIC_iter[iter] <= self.AIC_iter[iter - 1]: # Update the parameters + final_fit_result = fit_result self.AICini = self.AIC_iter[iter] self._update_params(**fit_result) beta_cov = fit_result.get("beta_cov") @@ -841,6 +842,7 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False) -> dict: gamma_cov = gamma_cov[:-1] nind_sh -= 1 + fit_result = final_fit_result self.niter_cov = iter - self.niter_harm self.nit = iter @@ -1281,12 +1283,15 @@ def _fit( """ # Fitting options if options is None: - options = { - "gtol": 1e-8, - "xtol": 1e-8, - "barrier_tol": 1e-6, - "maxiter": 1000, - } + options = dict( + maxiter=20000, + # Aim for first-order optimality: + gtol=1e-8, # or 1e-9 if your scaling is good + # Make f-decrease test essentially inactive: + ftol=1e-20, # very small so it won’t trigger early + maxcor=20, # more curvature memory + maxls=100 # more line-search steps + ) # Total number of parameters to be estimated nmu = 2 * nmu @@ -1430,6 +1435,30 @@ def _fit( + ngamma ] = 0.15 + if ntrend_sh > 0: + lb[ + 2 + + self.ngamma0 + + nmu + + npsi + + ntrend_loc + + nind_loc + + ntrend_sc + + nind_sc + + ngamma + ] = -0.15 + ub[ + 2 + + self.ngamma0 + + nmu + + npsi + + ntrend_loc + + nind_loc + + ntrend_sc + + nind_sc + + ngamma + ] = 0.15 + if nind_sh > 0: lb[ 2 @@ -1440,7 +1469,8 @@ def _fit( + nind_loc + ntrend_sc + nind_sc - + ngamma : 2 + + ngamma + + ntrend_sh : 2 + self.ngamma0 + nmu + npsi @@ -1449,6 +1479,7 @@ def _fit( + ntrend_sc + nind_sc + ngamma + + ntrend_sh + nind_sh ] = -0.15 ub[ @@ -1460,7 +1491,8 @@ def _fit( + nind_loc + ntrend_sc + nind_sc - + ngamma : 2 + + ngamma + + ntrend_sh : 2 + self.ngamma0 + nmu + npsi @@ -1469,6 +1501,7 @@ def _fit( + ntrend_sc + nind_sc + ngamma + + ntrend_sh + nind_sh ] = 0.15 @@ -1484,90 +1517,94 @@ def _fit( bounds = [(lb_i, up_i) for lb_i, up_i in zip(lb, ub)] result = minimize( fun=self._auxmin_loglikelihood, - jac=self._auxmin_loglikelihood_grad, # Gradient information - hess=self._auxmin_loglikelihood_hess, # Hessian information, if applicable x0=x_ini, bounds=bounds, args=( nmu, npsi, - ngamma, + ngamma, ntrend_loc, list_loc, - ntrend_sc, + ntrend_sc, list_sc, ntrend_sh, list_sh, ), - options=options, # Options - method="trust-constr", + method='L-BFGS-B', + jac=self._auxmin_loglikelihood_grad, + options={ + 'maxiter': options.get('maxiter', 1000), + 'ftol': options.get('ftol', 1e-6), + 'gtol': options.get('gtol', 1e-6), + } ) fit_result["x"] = result.x # Optimal parameters vector - fit_result["negloglikelihood"] = result.fun # Optimal loglikelihood + fit_result["negloglikelihood"] = result.fun # Optimal loglikelihood + fit_result["AIC"] = self._AIC(-fit_result["negloglikelihood"], n_params) + fit_result["n_params"] = n_params fit_result["success"] = result.success fit_result["message"] = result.message - fit_result["grad"] = result.grad - fit_result["hess_inv"] = ( - result.hess_inv if "hess_inv" in result else None - ) # 'hess_inv' is only available if 'hess' is provided - - # Check if any of the bounds related to shape parameters become active, if active increase or decrease the bound and call the optimization routine again - lambdas = result.v - auxlb = [] - auxub = [] - for i, v in enumerate(lambdas[0]): - if np.abs(fit_result["x"][i] - lb[i]) <= 1e-6 or v < -1e-6: - lb[i] -= 0.05 - auxlb.append(i) - if np.abs(fit_result["x"][i] - ub[i]) <= 1e-6 or v > 1e-6: - ub[i] += 0.05 - auxub.append(i) - - it = 0 - while (len(auxlb) > 0 or len(auxub) > 0) and it < 10: - it += 1 - result = minimize( - fun=self._auxmin_loglikelihood, - jac=self._auxmin_loglikelihood_grad, # Gradient information - hess=self._auxmin_loglikelihood_hess, # Hessian information, if applicable - x0=fit_result["x"], - bounds=bounds, - args=( - nmu, - npsi, - ngamma, - ntrend_loc, - list_loc, - ntrend_sc, - list_sc, - ntrend_sh, - list_sh, - ), - options=options, # Options - method="trust-constr", - ) + fit_result["grad"] = result.grad if hasattr(result, 'grad') else None + fit_result["jac"] = result.jac if hasattr(result, 'jac') else None + fit_result["hess_inv"] = result.hess_inv if hasattr(result, 'hess_inv') else None + + # # Check if any of the bounds related to shape parameters become active, if active increase or decrease the bound and call the optimization routine again + # lambdas = result.v + # auxlb = [] + # auxub = [] + # for i, v in enumerate(lambdas[0]): + # if np.abs(fit_result["x"][i] - lb[i]) <= 1e-6 or v < -1e-6: + # lb[i] -= 0.05 + # auxlb.append(i) + # if np.abs(fit_result["x"][i] - ub[i]) <= 1e-6 or v > 1e-6: + # ub[i] += 0.05 + # auxub.append(i) + + # it = 0 + # while (len(auxlb) > 0 or len(auxub) > 0) and it < 10: + # it += 1 + # result = minimize( + # fun=self._auxmin_loglikelihood, + # jac=self._auxmin_loglikelihood_grad, # Gradient information + # hess=self._auxmin_loglikelihood_hess, # Hessian information, if applicable + # x0=fit_result["x"], + # bounds=bounds, + # args=( + # nmu, + # npsi, + # ngamma, + # ntrend_loc, + # list_loc, + # ntrend_sc, + # list_sc, + # ntrend_sh, + # list_sh, + # ), + # options=options, # Options + # method="trust-constr", + # ) - fit_result["x"] = result.x # Optimal parameters vector - fit_result["negloglikelihood"] = result.fun # Optimal loglikelihood - fit_result["AIC"] = self._AIC(-fit_result["negloglikelihood"], n_params) - fit_result["n_params"] = n_params - fit_result["success"] = result.success - fit_result["message"] = result.message - fit_result["grad"] = result.grad - fit_result["hess_inv"] = ( - result.hess_inv if "hess_inv" in result else None - ) # 'hess_inv' is only available if 'hess' is provided - fit_result["lambdas"] = result.v - auxlb = [] - auxub = [] - for i, v in enumerate(lambdas[0]): - if np.abs(fit_result["x"][i] - lb[i]) <= 1e-6 or v < -1e-6: - lb[i] -= 0.05 - auxlb.append(i) - if np.abs(fit_result["x"][i] - ub[i]) <= 1e-6 or v > 1e-6: - ub[i] += 0.05 - auxub.append(i) + # fit_result["x"] = result.x # Optimal parameters vector + # fit_result["negloglikelihood"] = result.fun # Optimal loglikelihood + # fit_result["AIC"] = self._AIC(-fit_result["negloglikelihood"], n_params) + # fit_result["n_params"] = n_params + # fit_result["success"] = result.success + # fit_result["message"] = result.message + # fit_result["grad"] = result.grad + # fit_result["hess_inv"] = ( + # result.hess_inv if "hess_inv" in result else None + # ) # 'hess_inv' is only available if 'hess' is provided + # fit_result["lambdas"] = result.v + # auxlb = [] + # auxub = [] + # for i, v in enumerate(lambdas[0]): + # if np.abs(fit_result["x"][i] - lb[i]) <= 1e-6 or v < -1e-6: + # lb[i] -= 0.05 + # auxlb.append(i) + # if np.abs(fit_result["x"][i] - ub[i]) <= 1e-6 or v > 1e-6: + # ub[i] += 0.05 + # auxub.append(i) # Location parameter fit_result["beta0"] = fit_result["x"][0] @@ -4804,6 +4841,10 @@ def fit( fit_result["grad"] = Jx fit_result["hessian"] = Hxx + # if fit_result["hess_inv"] is not None: + # self.invI0 = fit_result["hess_inv"] + # else: + # self.invI0 = np.linalg.inv(-Hxx) self.invI0 = np.linalg.inv(-Hxx) fit_result["invI0"] = self.invI0 From c4d9bb652aa29e1f1073dbdb008a7afbf2cfa47b Mon Sep 17 00:00:00 2001 From: Colladito Date: Fri, 24 Oct 2025 13:13:41 +0200 Subject: [PATCH 02/13] [VCF] Add AIC after fitting --- bluemath_tk/distributions/nonstat_gev.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bluemath_tk/distributions/nonstat_gev.py b/bluemath_tk/distributions/nonstat_gev.py index 366bb62..d3056c0 100644 --- a/bluemath_tk/distributions/nonstat_gev.py +++ b/bluemath_tk/distributions/nonstat_gev.py @@ -1534,6 +1534,8 @@ def _fit( fit_result["x"] = result.x # Optimal parameters vector fit_result["negloglikelihood"] = result.fun # Optimal loglikelihood + fit_result["AIC"] = self._AIC(-fit_result["negloglikelihood"], n_params) + fit_result["n_params"] = n_params fit_result["success"] = result.success fit_result["message"] = result.message fit_result["grad"] = result.grad From d6551b05f9e20d53c142ef619143bcb7cd306de1 Mon Sep 17 00:00:00 2001 From: Colladito Date: Fri, 24 Oct 2025 13:20:35 +0200 Subject: [PATCH 03/13] [VCF] Small error fixed --- bluemath_tk/distributions/nonstat_gev.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bluemath_tk/distributions/nonstat_gev.py b/bluemath_tk/distributions/nonstat_gev.py index d3056c0..2997df7 100644 --- a/bluemath_tk/distributions/nonstat_gev.py +++ b/bluemath_tk/distributions/nonstat_gev.py @@ -4838,7 +4838,7 @@ def fit( std_params = np.sqrt(np.diag(self.invI0)) self.std_params = std_params - fit_result["std_param"] = std_params + fit_result["std_params"] = std_params if plot: self.plot() From 8be2ce7b3ac7e68cb733294bb05ba342c43a962b Mon Sep 17 00:00:00 2001 From: Colladito Date: Fri, 24 Oct 2025 14:11:00 +0200 Subject: [PATCH 04/13] [VCF] Change background color of plot and reduce error --- bluemath_tk/distributions/nonstat_gev.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/bluemath_tk/distributions/nonstat_gev.py b/bluemath_tk/distributions/nonstat_gev.py index b62b331..9ac20c7 100644 --- a/bluemath_tk/distributions/nonstat_gev.py +++ b/bluemath_tk/distributions/nonstat_gev.py @@ -1285,9 +1285,9 @@ def _fit( options = dict( maxiter=20000, # Aim for first-order optimality: - gtol=1e-8, # or 1e-9 if your scaling is good + gtol=1e-6, # or 1e-9 if your scaling is good # Make f-decrease test essentially inactive: - ftol=1e-20, # very small so it won’t trigger early + ftol=1e-8, # very small so it won’t trigger early maxcor=20, # more curvature memory maxls=100 # more line-search steps ) @@ -1592,10 +1592,9 @@ def _fit( fit_result["n_params"] = n_params fit_result["success"] = result.success fit_result["message"] = result.message - fit_result["grad"] = result.grad - fit_result["hess_inv"] = ( - result.hess_inv if "hess_inv" in result else None - ) # 'hess_inv' is only available if 'hess' is provided + fit_result["grad"] = result.grad if hasattr(result, 'grad') else None + fit_result["jac"] = result.jac if hasattr(result, 'jac') else None + fit_result["hess_inv"] = result.hess_inv if hasattr(result, 'hess_inv') else None # 'hess_inv' is only available if 'hess' is provided auxlb = [] auxub = [] @@ -6949,7 +6948,7 @@ def plot(self, return_plot: bool = False, save: bool = False, init_year: int = 0 ] ).T - cf = ax1.contourf(t_shifted[t_ord], hvar, sf, levels=50, cmap="viridis_r") + cf = ax1.contourf(t_shifted[t_ord], hvar, sf, levels=50, cmap="Wistia") cbar = fig.colorbar(cf, ax=ax1) cbar.set_label("Exceedance probability", fontsize=12) # =============== From 8e3faca1110408995befb310489439ca1b6a4d3d Mon Sep 17 00:00:00 2001 From: Colladito Date: Mon, 27 Oct 2025 10:11:29 +0100 Subject: [PATCH 05/13] [VCF] Add boolean variable in auto_adjust to make stationary the shape parameter --- bluemath_tk/distributions/nonstat_gev.py | 480 ++++++++++++----------- 1 file changed, 245 insertions(+), 235 deletions(-) diff --git a/bluemath_tk/distributions/nonstat_gev.py b/bluemath_tk/distributions/nonstat_gev.py index 9ac20c7..d63aa92 100644 --- a/bluemath_tk/distributions/nonstat_gev.py +++ b/bluemath_tk/distributions/nonstat_gev.py @@ -164,7 +164,7 @@ def __init__( # Color palette self.colors = default_colors - def auto_adjust(self, max_iter: int = 1000, plot: bool = False) -> dict: + def auto_adjust(self, max_iter: int = 1000, plot: bool = False, stationary_shape: bool=False) -> dict: """ This method automatically select and calculate the parameters which minimize the AIC related to Non-Stationary GEV distribution using the Maximum Likelihood method within an iterative scheme, @@ -177,6 +177,8 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False) -> dict: Number of iteration in optimization process. plot : bool, default=False If plot the adjusted distribution + stationary_shape : bool, default=False + If True, the shape parameter remain stationary Return ---------- @@ -344,98 +346,99 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False) -> dict: max_val = auxmax pos = 2 - # Perturbation for shape - auxmax = abs( - auxJx[ - 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_loc - + ntrend_sc - + nind_sc - + 2 * npsi - + 2 * ngamma - + 4 : 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_loc - + ntrend_sc - + nind_sc - + 2 * npsi - + 2 * ngamma - + 4 - + 2 - ].T - @ auxI0[ - 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_loc - + ntrend_sc - + nind_sc - + 2 * npsi - + 2 * ngamma - + 4 : 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_loc - + ntrend_sc - + nind_sc - + 2 * npsi - + 2 * ngamma - + 4 - + 2, - 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_loc - + ntrend_sc - + nind_sc - + 2 * npsi - + 2 * ngamma - + 4 : 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_loc - + ntrend_sc - + nind_sc - + 2 * npsi - + 2 * ngamma - + 4 - + 2, - ] - @ auxJx[ - 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_loc - + ntrend_sc - + nind_sc - + 2 * npsi - + 2 * ngamma - + 4 : 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_loc - + ntrend_sc - + nind_sc - + 2 * npsi - + 2 * ngamma - + 4 - + 2 - ] - ) - if auxmax > max_val: - max_val = auxmax - pos = 3 + if not stationary_shape: + # Perturbation for shape + auxmax = abs( + auxJx[ + 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_loc + + ntrend_sc + + nind_sc + + 2 * npsi + + 2 * ngamma + + 4 : 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_loc + + ntrend_sc + + nind_sc + + 2 * npsi + + 2 * ngamma + + 4 + + 2 + ].T + @ auxI0[ + 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_loc + + ntrend_sc + + nind_sc + + 2 * npsi + + 2 * ngamma + + 4 : 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_loc + + ntrend_sc + + nind_sc + + 2 * npsi + + 2 * ngamma + + 4 + + 2, + 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_loc + + ntrend_sc + + nind_sc + + 2 * npsi + + 2 * ngamma + + 4 : 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_loc + + ntrend_sc + + nind_sc + + 2 * npsi + + 2 * ngamma + + 4 + + 2, + ] + @ auxJx[ + 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_loc + + ntrend_sc + + nind_sc + + 2 * npsi + + 2 * ngamma + + 4 : 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_loc + + ntrend_sc + + nind_sc + + 2 * npsi + + 2 * ngamma + + 4 + + 2 + ] + ) + if auxmax > max_val: + max_val = auxmax + pos = 3 # If maximum perturbation corresponds to location, include a new harmonic if pos == 1: @@ -537,6 +540,7 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False) -> dict: self.AIC_iter[iter] = self._AIC(-fit_result["negloglikelihood"], n_params) ######### COVARIATES Iterative process ######### + final_fit_result = fit_result nrows, nind_cov = self.covariates.shape # Number of covariates # Auxiliar variables related to location parameter @@ -650,33 +654,11 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False) -> dict: ) ) maximo_sc, pos_sc = np.max(values2), np.argmax(values2) - # Perturbation of shape - values3 = np.abs( - auxJx[ - 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_cov - + 2 * npsi - + ntrend_sc - + nind_cov - + 2 * ngamma - + ntrend_sh : 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_cov - + 2 * npsi - + ntrend_sc - + nind_cov - + 2 * ngamma - + ntrend_sh - + nind_cov - ] - ** 2 - / np.diag( - auxI0[ + + if not stationary_shape: + # Perturbation of shape + values3 = np.abs( + auxJx[ 2 + self.ngamma0 + 2 * nmu @@ -685,26 +667,8 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False) -> dict: + 2 * npsi + ntrend_sc + nind_cov - + 2 * ngamma : 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_cov - + 2 * npsi - + ntrend_sc - + nind_cov + 2 * ngamma - + ntrend_sh - + nind_cov, - 2 - + self.ngamma0 - + 2 * nmu - + ntrend_loc - + nind_cov - + 2 * npsi - + ntrend_sc - + nind_cov - + 2 * ngamma : 2 + + ntrend_sh : 2 + self.ngamma0 + 2 * nmu + ntrend_loc @@ -714,14 +678,59 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False) -> dict: + nind_cov + 2 * ngamma + ntrend_sh - + nind_cov, + + nind_cov ] + ** 2 + / np.diag( + auxI0[ + 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_cov + + 2 * npsi + + ntrend_sc + + nind_cov + + 2 * ngamma : 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_cov + + 2 * npsi + + ntrend_sc + + nind_cov + + 2 * ngamma + + ntrend_sh + + nind_cov, + 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_cov + + 2 * npsi + + ntrend_sc + + nind_cov + + 2 * ngamma : 2 + + self.ngamma0 + + 2 * nmu + + ntrend_loc + + nind_cov + + 2 * npsi + + ntrend_sc + + nind_cov + + 2 * ngamma + + ntrend_sh + + nind_cov, + ] + ) ) - ) - maximo_sh, pos_sh = np.max(values3), np.argmax(values3) + maximo_sh, pos_sh = np.max(values3), np.argmax(values3) - # Select the maximum perturbation - posmaxparam = np.argmax([maximo_loc, maximo_sc, maximo_sh]) + + # Select the maximum perturbation + posmaxparam = np.argmax([maximo_loc, maximo_sc, maximo_sh]) + else: + posmaxparam = np.argmax([maximo_loc, maximo_sc]) # Initialize auxiliar covariates variables if beta_cov.size > 0: @@ -1024,102 +1033,103 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False) -> dict: self.ntrend_sc = 0 # Shape trends - ntrend_sh = 1 + if not stationary_shape: + ntrend_sh = 1 - concatvalues = [ - fit_result["x"][0 : 1 + 2 * nmu], - np.zeros(self.ntrend_loc), - fit_result["x"][ - 1 + 2 * nmu : 1 + 2 * nmu + nind_loc - ], # Location initial parameter beta0, beta, betaT, beta_cov - fit_result["x"][ - 1 + 2 * nmu + nind_loc : 2 + 2 * nmu + nind_loc + 2 * npsi - ], - np.zeros(self.ntrend_sc), - fit_result["x"][ - 2 + 2 * nmu + nind_loc + 2 * npsi : 2 - + 2 * nmu - + nind_loc - + 2 * npsi - + nind_sc - ], # Scale initial parameter alpha0, alpha, alphaT, alpha_cov - fit_result["x"][2 + 2 * nmu + nind_loc + 2 * npsi + nind_sc] - * np.ones(self.ngamma0), - fit_result["x"][ - 2 + 2 * nmu + nind_loc + 2 * npsi + nind_sc + self.ngamma0 : 2 - + 2 * nmu - + nind_loc - + 2 * npsi - + nind_sc - + self.ngamma0 - + 2 * ngamma + concatvalues = [ + fit_result["x"][0 : 1 + 2 * nmu], + np.zeros(self.ntrend_loc), + fit_result["x"][ + 1 + 2 * nmu : 1 + 2 * nmu + nind_loc + ], # Location initial parameter beta0, beta, betaT, beta_cov + fit_result["x"][ + 1 + 2 * nmu + nind_loc : 2 + 2 * nmu + nind_loc + 2 * npsi + ], + np.zeros(self.ntrend_sc), + fit_result["x"][ + 2 + 2 * nmu + nind_loc + 2 * npsi : 2 + + 2 * nmu + + nind_loc + + 2 * npsi + + nind_sc + ], # Scale initial parameter alpha0, alpha, alphaT, alpha_cov + fit_result["x"][2 + 2 * nmu + nind_loc + 2 * npsi + nind_sc] + * np.ones(self.ngamma0), + fit_result["x"][ + 2 + 2 * nmu + nind_loc + 2 * npsi + nind_sc + self.ngamma0 : 2 + + 2 * nmu + + nind_loc + + 2 * npsi + + nind_sc + + self.ngamma0 + + 2 * ngamma + ] + * np.ones(2 * ngamma), + 0.01 * np.ones(ntrend_sh), + fit_result["x"][ + 2 + + 2 * nmu + + nind_loc + + 2 * npsi + + nind_sc + + self.ngamma0 + + 2 * ngamma : 2 + + 2 * nmu + + nind_loc + + 2 * npsi + + nind_sc + + self.ngamma0 + + 2 * ngamma + + nind_sh + ] + * np.ones( + nind_sh + ), # Shape initial parameter gamma0, gamma, gammaT, gamma_cov ] - * np.ones(2 * ngamma), - 0.01 * np.ones(ntrend_sh), - fit_result["x"][ + xini = np.concatenate( + [np.asarray(v) for v in concatvalues if v is not None] + ) + fit_result = self._fit( + self.nmu, + self.npsi, + self.ngamma, + self.list_loc, + self.ntrend_loc, + self.list_sc, + self.ntrend_sc, + self.list_sh, + ntrend_sh, + xini, + ) + + n_params = ( 2 - + 2 * nmu - + nind_loc - + 2 * npsi - + nind_sc - + self.ngamma0 - + 2 * ngamma : 2 - + 2 * nmu - + nind_loc - + 2 * npsi - + nind_sc + self.ngamma0 - + 2 * ngamma - + nind_sh - ] - * np.ones( - nind_sh - ), # Shape initial parameter gamma0, gamma, gammaT, gamma_cov - ] - xini = np.concatenate( - [np.asarray(v) for v in concatvalues if v is not None] - ) - fit_result = self._fit( - self.nmu, - self.npsi, - self.ngamma, - self.list_loc, - self.ntrend_loc, - self.list_sc, - self.ntrend_sc, - self.list_sh, - ntrend_sh, - xini, - ) - - n_params = ( - 2 - + self.ngamma0 - + 2 * self.nmu - + 2 * self.npsi - + 2 * self.ngamma - + self.ntrend_loc - + self.nind_loc - + ntrend_sc - + self.nind_sc - + ntrend_sh - + self.nind_sh - ) - self.AIC_iter[self.nit + 3] = self._AIC( - -fit_result["negloglikelihood"], n_params - ) - self.loglike_iter[self.nit + 3] = -fit_result["negloglikelihood"] + + 2 * self.nmu + + 2 * self.npsi + + 2 * self.ngamma + + self.ntrend_loc + + self.nind_loc + + ntrend_sc + + self.nind_sc + + ntrend_sh + + self.nind_sh + ) + self.AIC_iter[self.nit + 3] = self._AIC( + -fit_result["negloglikelihood"], n_params + ) + self.loglike_iter[self.nit + 3] = -fit_result["negloglikelihood"] - if self.AIC_iter[self.nit + 3] < self.AIC_iter[self.nit]: - self.AICini = self.AIC_iter[self.nit + 3] - print("Shape trend is significative") - print("Shape trend AIC: ", self.AICini) - # Update the parameters - self._update_params(**fit_result) - self.ntrend_sh = ntrend_sh - else: - print("Shape trend is NOT significative") - self.ntrend_sh = 0 + if self.AIC_iter[self.nit + 3] < self.AIC_iter[self.nit]: + self.AICini = self.AIC_iter[self.nit + 3] + print("Shape trend is significative") + print("Shape trend AIC: ", self.AICini) + # Update the parameters + self._update_params(**fit_result) + self.ntrend_sh = ntrend_sh + else: + print("Shape trend is NOT significative") + self.ntrend_sh = 0 # Final parameters values concatvalues = [ From 27dc9345e775e7e09e386a080dae07ffa68b697e Mon Sep 17 00:00:00 2001 From: Colladito Date: Thu, 30 Oct 2025 10:31:36 +0100 Subject: [PATCH 06/13] [VCF] Solve error when model is Gumbel in NonStatGEV --- bluemath_tk/distributions/nonstat_gev.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bluemath_tk/distributions/nonstat_gev.py b/bluemath_tk/distributions/nonstat_gev.py index d63aa92..dfe3bda 100644 --- a/bluemath_tk/distributions/nonstat_gev.py +++ b/bluemath_tk/distributions/nonstat_gev.py @@ -1131,6 +1131,7 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False, stationary_shape print("Shape trend is NOT significative") self.ntrend_sh = 0 + aux_gamma0 = fit_result["x"][2 + 2 * nmu + nind_loc + 2 * npsi + nind_sc] if self.ngamma0 == 1 else np.empty(1) # Final parameters values concatvalues = [ fit_result["x"][0 : 1 + 2 * nmu], @@ -1147,8 +1148,7 @@ def auto_adjust(self, max_iter: int = 1000, plot: bool = False, stationary_shape + 2 * npsi + nind_sc ], # Scale initial parameter alpha0, alpha, alphaT, alpha_cov - fit_result["x"][2 + 2 * nmu + nind_loc + 2 * npsi + nind_sc] - * np.ones(self.ngamma0), + aux_gamma0 * np.ones(self.ngamma0), fit_result["x"][ 2 + 2 * nmu + nind_loc + 2 * npsi + nind_sc + self.ngamma0 : 2 + 2 * nmu From 7c125e9b0bb4753a6ea839e245ed3646dd54f6d5 Mon Sep 17 00:00:00 2001 From: Colladito Date: Tue, 4 Nov 2025 13:10:25 +0100 Subject: [PATCH 07/13] [VCF] Changes in GPD --- bluemath_tk/distributions/gpd.py | 276 +++++++++++++------------------ 1 file changed, 115 insertions(+), 161 deletions(-) diff --git a/bluemath_tk/distributions/gpd.py b/bluemath_tk/distributions/gpd.py index 288d303..c74a968 100644 --- a/bluemath_tk/distributions/gpd.py +++ b/bluemath_tk/distributions/gpd.py @@ -1,6 +1,7 @@ from typing import Dict, List import numpy as np +from scipy.stats import genpareto from ._base_distributions import BaseDistribution, FitResult, fit_dist @@ -22,29 +23,29 @@ class GPD(BaseDistribution): Methods ------- - pdf(x, loc, scale, shape) + pdf(x, threshold, scale, shape) Probability density function. - cdf(x, loc, scale, shape) + cdf(x, threshold, scale, shape) Cumulative distribution function - qf(p, loc, scale, shape) + qf(p, threshold, scale, shape) Quantile function - sf(x, loc, scale, shape) + sf(x, threshold, scale, shape) Survival function - nll(data, loc, scale, shape) + nll(data, threshold, scale, shape) Negative Log-Likelihood function fit(data) Fit distribution to data (NOT IMPLEMENTED). - random(size, loc, scale, shape) + random(size, threshold, scale, shape) Generates random values from GPD distribution. - mean(loc, scale, shape) + mean(threshold, scale, shape) Mean of GPD distribution. - median(loc, scale, shape) + median(threshold, scale, shape) Median of GPD distribution. - variance(loc, scale, shape) + variance(threshold, scale, shape) Variance of GPD distribution. - std(loc, scale, shape) + std(threshold, scale, shape) Standard deviation of GPD distribution. - stats(loc, scale, shape) + stats(threshold, scale, shape) Summary statistics of GPD distribution. Notes @@ -54,9 +55,9 @@ class GPD(BaseDistribution): Examples -------- >>> from bluemath_tk.distributions.gpd import GPD - >>> gpd_pdf = GPD.pdf(x, loc=0, scale=1, shape=0.1) - >>> gpd_cdf = GPD.cdf(x, loc=0, scale=1, shape=0.1) - >>> gpd_qf = GPD.qf(p, loc=0, scale=1, shape=0.1) + >>> gpd_pdf = GPD.pdf(x, threshold=0, scale=1, shape=0.1) + >>> gpd_cdf = GPD.cdf(x, threshold=0, scale=1, shape=0.1) + >>> gpd_qf = GPD.qf(p, threshold=0, scale=1, shape=0.1) """ def __init__(self) -> None: @@ -81,11 +82,11 @@ def param_names() -> List[str]: """ Name of parameters of GPD """ - return ["loc", "scale", "shape"] + return ["threshold", "scale", "shape"] @staticmethod def pdf( - x: np.ndarray, loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + x: np.ndarray, threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0 ) -> np.ndarray: """ Probability density function @@ -94,8 +95,8 @@ def pdf( ---------- x : np.ndarray Values to compute the probability density value - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -115,25 +116,12 @@ def pdf( if scale <= 0: raise ValueError("Scale parameter must be > 0") - - y = np.maximum(x - loc, 0) / scale - - # Gumbel case (shape = 0) - if shape == 0.0: - pdf = (1 / scale) * (np.exp(-y)) - - # General case (Weibull and Frechet, shape != 0) - else: - pdf = np.full_like(x, 0, dtype=float) - yy = 1 + shape * y - yymask = yy > 0 - pdf[yymask] = (1 / scale) * (yy[yymask] ** (-1 - (1 / shape))) - - return pdf + + return genpareto.pdf(x, c=shape, loc=threshold, scale=scale) @staticmethod def cdf( - x: np.ndarray, loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + x: np.ndarray, threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0 ) -> np.ndarray: """ Cumulative distribution function @@ -142,8 +130,8 @@ def cdf( ---------- x : np.ndarray Values to compute their probability - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -164,21 +152,11 @@ def cdf( if scale <= 0: raise ValueError("Scale parameter must be > 0") - y = np.maximum(x - loc, 0) / scale - - # Gumbel case (shape = 0) - if shape == 0.0: - p = 1 - np.exp(-y) - - # General case (Weibull and Frechet, shape != 0) - else: - p = 1 - np.maximum(1 + shape * y, 0) ** (-1 / shape) - - return p + return genpareto.cdf(x, c=shape, loc=threshold, scale=scale) @staticmethod def sf( - x: np.ndarray, loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + x: np.ndarray, threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0 ) -> np.ndarray: """ Survival function (1-Cumulative Distribution Function) @@ -187,8 +165,8 @@ def sf( ---------- x : np.ndarray Values to compute their survival function value - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -209,13 +187,11 @@ def sf( if scale <= 0: raise ValueError("Scale parameter must be > 0") - sp = 1 - GPD.cdf(x, loc=loc, scale=scale, shape=shape) - - return sp + return genpareto.sf(x, c=shape, loc=threshold, scale=scale) @staticmethod def qf( - p: np.ndarray, loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + p: np.ndarray, threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0 ) -> np.ndarray: """ Quantile function (Inverse of Cumulative Distribution Function) @@ -224,8 +200,8 @@ def qf( ---------- p : np.ndarray Probabilities to compute their quantile - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -246,25 +222,17 @@ def qf( If scale is not greater than 0. """ - if np.min(p) <= 0 or np.max(p) >= 1: + if np.min(p) < 0 or np.max(p) > 1: raise ValueError("Probabilities must be in the range (0, 1)") if scale <= 0: raise ValueError("Scale parameter must be > 0") - # Gumbel case (shape = 0) - if shape == 0.0: - q = loc - scale * np.log(1 - p) - - # General case (Weibull and Frechet, shape != 0) - else: - q = loc + scale * ((1 - p) ** (-shape) - 1) / shape - - return q + return genpareto.ppf(p, c=shape, loc=threshold, scale=scale) @staticmethod def nll( - data: np.ndarray, loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + data: np.ndarray, threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0 ) -> float: """ Negative Log-Likelihood function @@ -273,8 +241,8 @@ def nll( ---------- data : np.ndarray Data to compute the Negative Log-Likelihood value - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -286,34 +254,15 @@ def nll( nll : float Negative Log-Likelihood value """ - if scale <= 0: - nll = np.inf # Return a large value for invalid scale - - else: - y = (data - loc) / scale - - # # Gumbel case (shape = 0) - # if shape == 0.0: - # nll = data.shape[0] * np.log(scale) + np.sum(y) - - # General case (Weibull and Frechet, shape != 0) - # else: - shape = ( - np.maximum(shape, 1e-8) if shape > 0 else np.minimum(shape, -1e-8) - ) # Avoid division by zero - y = 1 + shape * y - if np.min(y <= 0): - nll = np.inf # Return a large value for invalid y - else: - nll = data.shape[0] * np.log(scale) + (1 / shape + 1) * np.sum( - np.log(y) - ) - - return nll + return np.inf + if np.any(data < threshold): + return np.inf + + return -np.sum(genpareto.logpdf(data, c=shape, loc=threshold, scale=scale), axis=0) @staticmethod - def fit(data: np.ndarray, **kwargs) -> FitResult: + def fit(data: np.ndarray, threshold: float, **kwargs) -> FitResult: """ Fit GEV distribution @@ -321,6 +270,8 @@ def fit(data: np.ndarray, **kwargs) -> FitResult: ---------- data : np.ndarray Data to fit the GEV distribution + threshold : float, default=0.0 + Threshold parameter **kwargs : dict, optional Additional keyword arguments for the fitting function. These can include options like method, bounds, etc. @@ -333,13 +284,53 @@ def fit(data: np.ndarray, **kwargs) -> FitResult: Result of the fit containing the parameters loc, scale, shape, success status, and negative log-likelihood value. """ - # Fit the GEV distribution to the data using the fit_dist function - return fit_dist(GPD, data, **kwargs) - + exceedances = data[data > threshold] + + if len(exceedances) == 0: + raise ValueError("No exceedances above threshold") + + # Adjust exceedances relative to threshold + exceedances_adjusted = exceedances - threshold + + # Default optimization settings + default_x0 = [np.std(exceedances_adjusted), 0.1] + x0 = kwargs.get("x0", default_x0) + method = kwargs.get("method", "Nelder-Mead") + default_bounds = [(1e-6, None), (None, None)] + bounds = kwargs.get("bounds", default_bounds) + options = kwargs.get("options", {"disp": False}) + + # Objective function that includes threshold + def obj(params): + scale, shape = params + return GPD.nll(exceedances, threshold=threshold, scale=scale, shape=shape) + + # Perform optimization + from scipy.optimize import minimize, OptimizeResult + result = minimize( + fun=obj, + x0=x0, + method=method, + bounds=bounds, + options=options + ) + + # Create modified result + modified_result = OptimizeResult( + x=[threshold, result.x[0], result.x[1]], + success=result.success, + fun=result.fun, + message=result.message, + hess_inv=result.hess_inv if hasattr(result, "hess_inv") else None, + ) + + # Return the fitting result + return FitResult(GPD, exceedances, modified_result) + @staticmethod def random( size: int, - loc: float = 0.0, + threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0, random_state: int = None, @@ -351,8 +342,8 @@ def random( ---------- size : int Number of random values to generate - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -376,32 +367,17 @@ def random( if scale <= 0: raise ValueError("Scale parameter must be > 0") - # Set random state if provided - if random_state is not None: - np.random.seed(random_state) - - # Generate uniform random numbers - u = np.random.uniform(0, 1, size) - - # Gumbel case (shape = 0) - if shape == 0.0: - x = loc - scale * np.log(u) - - # General case (Weibull and Frechet, shape != 0) - else: - x = loc + scale * (u ** (-shape) - 1) / shape - - return x + return genpareto.rvs(c=shape, loc=threshold, scale=scale, size=size, random_state=random_state) @staticmethod - def mean(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: + def mean(threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: """ Mean Parameters ---------- - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -426,25 +402,17 @@ def mean(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: if scale <= 0: raise ValueError("Scale parameter must be > 0") - if shape >= 1: - Warning("Shape parameter must be < 1 for mean to be defined") - mean = np.inf - - # Shape < 1 case - else: - mean = scale / (1 - shape) - - return mean + return genpareto.mean(c=shape, loc=threshold, scale=scale) @staticmethod - def median(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: + def median(threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: """ Median Parameters ---------- - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -465,22 +433,17 @@ def median(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: if scale <= 0: raise ValueError("Scale parameter must be > 0") - if shape == 0: - median = np.inf - else: - median = loc + scale * (2**shape - 1) / shape - - return median + return genpareto.median(c=shape, loc=threshold, scale=scale) @staticmethod - def variance(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: + def variance(threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: """ Variance Parameters ---------- - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -504,25 +467,17 @@ def variance(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: if scale <= 0: raise ValueError("Scale parameter must be > 0") - # Gumbel case (shape = 0) - if shape >= 1 / 2: - Warning("Shape parameter must be < 1/2 for variance to be defined") - var = np.inf - - else: - var = scale**2 / ((1 - shape) ** 2 * (1 - 2 * shape)) - - return var + return genpareto.var(c=shape, loc=threshold, scale=scale) @staticmethod - def std(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: + def std(threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: """ Standard deviation Parameters ---------- - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -544,13 +499,12 @@ def std(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: if scale <= 0: raise ValueError("Scale parameter must be > 0") - std = np.sqrt(GPD.variance(loc, scale, shape)) + return genpareto.std(c=shape, loc=threshold, scale=scale) - return std @staticmethod def stats( - loc: float = 0.0, scale: float = 1.0, shape: float = 0.0 + threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0 ) -> Dict[str, float]: """ Summary statistics @@ -559,8 +513,8 @@ def stats( Parameters ---------- - loc : float, default=0.0 - Location parameter + threshold : float, default=0.0 + Threshold parameter scale : float, default = 1.0 Scale parameter. Must be greater than 0. @@ -583,10 +537,10 @@ def stats( raise ValueError("Scale parameter must be > 0") stats = { - "mean": float(GPD.mean(loc, scale, shape)), - "median": float(GPD.median(loc, scale, shape)), - "variance": float(GPD.variance(loc, scale, shape)), - "std": float(GPD.std(loc, scale, shape)), + "mean": float(GPD.mean(threshold, scale, shape)), + "median": float(GPD.median(threshold, scale, shape)), + "variance": float(GPD.variance(threshold, scale, shape)), + "std": float(GPD.std(threshold, scale, shape)), } return stats From 332b0261899cece63ea9ed39c8a465188aa11b82 Mon Sep 17 00:00:00 2001 From: Colladito Date: Thu, 20 Nov 2025 10:04:50 +0100 Subject: [PATCH 08/13] [VCF] Updated GPD parameters --- bluemath_tk/distributions/extreme_correction.py | 2 +- bluemath_tk/distributions/pot.py | 2 +- bluemath_tk/distributions/utils/extr_corr_utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bluemath_tk/distributions/extreme_correction.py b/bluemath_tk/distributions/extreme_correction.py index b6fcbfb..e20cf2e 100644 --- a/bluemath_tk/distributions/extreme_correction.py +++ b/bluemath_tk/distributions/extreme_correction.py @@ -221,7 +221,7 @@ def fit( self.threshold, self.pot_data, pot_idx = opt_threshold.fit() self.poiss_parameter = self.pot_data.size / self.am_data.size - fit_result = GPD.fit(self.pot_data, f0=self.threshold) + fit_result = GPD.fit(self.pot_data, threshold=self.threshold) # If Annual Maxima used in fitting step if self.method == "am": diff --git a/bluemath_tk/distributions/pot.py b/bluemath_tk/distributions/pot.py index 6f98058..d96092d 100644 --- a/bluemath_tk/distributions/pot.py +++ b/bluemath_tk/distributions/pot.py @@ -397,7 +397,7 @@ def studentized_residuals( ax.legend(loc="upper right") if folder is not None: plt.savefig(f"{folder}/StudenRes{it}.png", dpi=300) - plt.show() + # plt.show() plt.close() if fobj > chi2.ppf(1 - sig_level, df=u_values.size - 2) or np.abs( diff --git a/bluemath_tk/distributions/utils/extr_corr_utils.py b/bluemath_tk/distributions/utils/extr_corr_utils.py index cb6014f..78864db 100644 --- a/bluemath_tk/distributions/utils/extr_corr_utils.py +++ b/bluemath_tk/distributions/utils/extr_corr_utils.py @@ -29,7 +29,7 @@ def gpdpoiss_ci_rp_bootstrap( # Vectorized parameter fitting boot_params = np.zeros((B, 3)) for i in range(B): - fit_result = GPD.fit(boot_samples[i], f0=threshold) + fit_result = GPD.fit(boot_samples[i], threshold=threshold) boot_params[i, :] = fit_result.params return_periods = np.array( From 6fc6c985d3e2007ffa585d13d4aff893937e1986 Mon Sep 17 00:00:00 2001 From: Colladito Date: Thu, 20 Nov 2025 10:29:34 +0100 Subject: [PATCH 09/13] [VCF] Add ExtremeCorrection class --- tests/distributions/test_extr_corr.py | 373 ++++++++++++++++++++++++++ 1 file changed, 373 insertions(+) create mode 100644 tests/distributions/test_extr_corr.py diff --git a/tests/distributions/test_extr_corr.py b/tests/distributions/test_extr_corr.py new file mode 100644 index 0000000..5df91cb --- /dev/null +++ b/tests/distributions/test_extr_corr.py @@ -0,0 +1,373 @@ +import os +import shutil +import tempfile +import unittest + +import numpy as np +import xarray as xr + +from bluemath_tk.distributions.extreme_correction import ExtremeCorrection +from bluemath_tk.distributions.gev import GEV + + +class TestExtremeCorrection(unittest.TestCase): + """Test suite for ExtremeCorrection class""" + + def setUp(self): + """Set up test fixtures""" + np.random.seed(42) + + # Create temporary directory for outputs + self.test_dir = tempfile.mkdtemp() + + # Generate synthetic historical data (10 years, daily) + n_years_hist = 10 + n_days = 365 + time_hist = xr.date_range( + start="2000-01-01", periods=n_years_hist * n_days, freq="D", use_cftime=True + ) + + # Generate GEV distributed data + self.loc_true = 5.0 + self.scale_true = 2.0 + self.shape_true = 0.1 + + hist_values = GEV.random( + size=len(time_hist), + loc=self.loc_true, + scale=self.scale_true, + shape=self.shape_true, + random_state=42, + ) + + self.data_hist = xr.Dataset( + { + "hs": (["time"], hist_values), + }, + coords={"time": time_hist}, + ) + + # Generate synthetic simulated data (5 simulations, 20 years each) + n_years_sim = 20 + n_sims = 5 + time_sim = xr.date_range( + start="2010-01-01", periods=n_years_sim * n_days, freq="D", use_cftime=True + ) + + sim_values = np.array( + [ + GEV.random( + size=len(time_sim), + loc=self.loc_true * 0.8, # Biased synthetic data + scale=self.scale_true * 0.9, + shape=self.shape_true * 1.2, + random_state=i, + ) + for i in range(n_sims) + ] + ) + + self.data_sim = xr.Dataset( + { + "hs": (["n_sim", "time"], sim_values), + }, + coords={"n_sim": np.arange(n_sims), "time": time_sim}, + ) + + # Basic configuration + self.corr_config = { + "var": "hs", + "time_var": "time", + "yyyy_var": "time.year", + "freq": 365.25, + "folder": self.test_dir, + } + + self.pot_config = { + "n0": 10, + "min_peak_distance": 2, + "init_threshold": 5.0, + "siglevel": 0.05, + "plot": False, + } + + def tearDown(self): + """Clean up test fixtures""" + if os.path.exists(self.test_dir): + shutil.rmtree(self.test_dir) + + def test_initialization_am(self): + """Test initialization with Annual Maxima method""" + ec = ExtremeCorrection( + corr_config=self.corr_config, + pot_config=self.pot_config, + method="am", + conf_level=0.95, + ) + + self.assertEqual(ec.method, "am") + self.assertEqual(ec.conf, 0.95) + self.assertEqual(ec.var, "hs") + self.assertTrue(hasattr(ec, "parameters")) + + def test_initialization_pot(self): + """Test initialization with POT method""" + ec = ExtremeCorrection( + corr_config=self.corr_config, + pot_config=self.pot_config, + method="pot", + conf_level=0.90, + ) + + self.assertEqual(ec.method, "pot") + self.assertEqual(ec.conf, 0.90) + self.assertTrue(hasattr(ec, "pot_config")) + self.assertEqual(ec.pot_config["n0"], 10) + + def test_config_validation_missing_key(self): + """Test that missing required config keys raise KeyError""" + invalid_config = {} # Missing required keys + + with self.assertRaises(KeyError): + ExtremeCorrection( + corr_config=invalid_config, pot_config=self.pot_config, method="am" + ) + + def test_config_validation_wrong_type(self): + """Test that wrong config types raise TypeError""" + invalid_config = self.corr_config.copy() + invalid_config["var"] = 123 # Should be string + + with self.assertRaises(TypeError): + ExtremeCorrection( + corr_config=invalid_config, pot_config=self.pot_config, method="am" + ) + + def test_fit_am_method(self): + """Test fitting with Annual Maxima method""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + ec.fit(data_hist=self.data_hist, plot_diagnostic=False) + + # Check that parameters were fitted + self.assertEqual(len(ec.parameters), 3) + self.assertIsNotNone(ec.am_data) + self.assertIsNotNone(ec.pit_data) + self.assertEqual(ec.poiss_parameter, 1) # GEV has lambda=1 + + def test_fit_pot_method(self): + """Test fitting with POT method""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="pot" + ) + + ec.fit(data_hist=self.data_hist, plot_diagnostic=False) + + # Check that parameters were fitted + self.assertEqual(len(ec.parameters), 3) + self.assertIsNotNone(ec.threshold) + self.assertIsNotNone(ec.pot_data) + self.assertGreater(ec.poiss_parameter, 0) + + def test_transform(self): + """Test transform method""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + ec.fit(data_hist=self.data_hist) + result = ec.transform(data_sim=self.data_sim, prob="unif", random_state=42) + + # Check output structure + self.assertIsInstance(result, xr.Dataset) + self.assertIn("hs_corr", result.variables) + + # Check that corrected data exists + self.assertIsNotNone(ec.sim_pit_data_corrected) + self.assertIsNotNone(ec.sim_am_data_corr) + + def test_fit_transform(self): + """Test fit_transform method""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + result = ec.fit_transform( + data_hist=self.data_hist, + data_sim=self.data_sim, + prob="unif", + random_state=42, + ) + + # Check that both fit and transform were performed + self.assertIsNotNone(ec.parameters) + self.assertIsInstance(result, xr.Dataset) + self.assertIn("hs_corr", result.variables) + + def test_transform_with_ecdf_prob(self): + """Test transform with ECDF probabilities""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + ec.fit(data_hist=self.data_hist) + result = ec.transform(data_sim=self.data_sim, prob="ecdf", random_state=42) + + self.assertIsInstance(result, xr.Dataset) + self.assertIn("hs_corr", result.variables) + + def test_test_method(self): + """Test the Cramer-von-Mises test""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + ec.fit(data_hist=self.data_hist) + ec.transform(data_sim=self.data_sim, random_state=42) + + test_result = ec.test() + + # Check test result structure + self.assertIsInstance(test_result, dict) + self.assertIn("Statistic", test_result) + self.assertIn("P-value", test_result) + self.assertGreaterEqual(test_result["P-value"], 0) + self.assertLessEqual(test_result["P-value"], 1) + + def test_correlations(self): + """Test correlation calculations""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + ec.fit(data_hist=self.data_hist) + ec.transform(data_sim=self.data_sim, random_state=42) + + corr_result = ec.correlations() + + # Check correlation result structure + self.assertIsInstance(corr_result, dict) + self.assertIn("Spearman", corr_result) + self.assertIn("Kendall", corr_result) + self.assertIn("Pearson", corr_result) + + # Check that correlations are in valid range + for corr_type, corr_value in corr_result.items(): + self.assertGreaterEqual(corr_value, -1) + self.assertLessEqual(corr_value, 1) + + def test_plot_methods(self): + """Test plotting methods""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + ec.fit(data_hist=self.data_hist) + ec.transform(data_sim=self.data_sim, random_state=42) + + # Test hist_retper_plot + fig1, ax1 = ec.hist_retper_plot() + self.assertIsNotNone(fig1) + self.assertIsNotNone(ax1) + + # Test sim_retper_plot + fig2, ax2 = ec.sim_retper_plot() + self.assertIsNotNone(fig2) + self.assertIsNotNone(ax2) + + # Test plot method + figs, axes = ec.plot() + self.assertEqual(len(figs), 2) + self.assertEqual(len(axes), 2) + + def test_preprocess_data_single_dataset(self): + """Test data preprocessing with single dataset""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + pit_data, am_data = ec._preprocess_data( + data=self.data_hist, var="hs", sim=False, join_sims=True + ) + + self.assertIsInstance(pit_data, np.ndarray) + self.assertIsInstance(am_data, np.ndarray) + self.assertGreater(len(pit_data), len(am_data)) + + def test_preprocess_data_multiple_sims(self): + """Test data preprocessing with multiple simulations""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + pit_data, am_data = ec._preprocess_data( + data=self.data_sim, var="hs", sim=True, join_sims=True + ) + + self.assertIsInstance(pit_data, np.ndarray) + self.assertIsInstance(am_data, np.ndarray) + # Check that all simulations are joined + expected_length = len(self.data_sim.time) * len(self.data_sim.n_sim) + self.assertEqual(len(pit_data), expected_length) + + def test_parameters_shape_am(self): + """Test that fitted parameters have correct shape for AM""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + ec.fit(data_hist=self.data_hist) + + # GEV has 3 parameters: loc, scale, shape + self.assertEqual(len(ec.parameters), 3) + self.assertGreater(ec.parameters[1], 0) # scale > 0 + + def test_parameters_shape_pot(self): + """Test that fitted parameters have correct shape for POT""" + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="pot" + ) + + ec.fit(data_hist=self.data_hist) + + # GPD has 3 parameters: threshold, scale, shape + self.assertEqual(len(ec.parameters), 3) + self.assertGreater(ec.parameters[1], 0) # scale > 0 + + def test_no_correction_when_pvalue_high(self): + """Test that no correction is applied when p-value is high""" + # Create data that already fits well + ec = ExtremeCorrection( + corr_config=self.corr_config, pot_config=self.pot_config, method="am" + ) + + # Fit with same distribution + ec.fit(data_hist=self.data_hist) + + # Create sim data from same distribution + time_sim = xr.date_range( + start="2010-01-01", periods=20 * 365, freq="D", use_cftime=True + ) + sim_values_good = GEV.random( + size=len(time_sim), + loc=ec.parameters[0], + scale=ec.parameters[1], + shape=ec.parameters[2], + random_state=100, + ) + data_sim_good = xr.Dataset( + {"hs": (["n_sim", "time"], sim_values_good.reshape(1, -1))}, + coords={"n_sim": [0], "time": time_sim}, + ) + + result = ec.transform(data_sim=data_sim_good, siglevel=0.05, random_state=42) + + # If p-value > siglevel, data should not be corrected + if ec.p_value > 0.05: + np.testing.assert_array_equal(ec.sim_am_data_corr, ec.sim_am_data) + + +if __name__ == "__main__": + unittest.main() From 1a92e6f06e6994d4b322d0d6af36060cf75cce0f Mon Sep 17 00:00:00 2001 From: Colladito Date: Thu, 20 Nov 2025 10:33:15 +0100 Subject: [PATCH 10/13] [VCF] Update ExtremeCorrection test --- tests/distributions/test_extr_corr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/distributions/test_extr_corr.py b/tests/distributions/test_extr_corr.py index 5df91cb..aa6962a 100644 --- a/tests/distributions/test_extr_corr.py +++ b/tests/distributions/test_extr_corr.py @@ -362,7 +362,7 @@ def test_no_correction_when_pvalue_high(self): coords={"n_sim": [0], "time": time_sim}, ) - result = ec.transform(data_sim=data_sim_good, siglevel=0.05, random_state=42) + _ = ec.transform(data_sim=data_sim_good, siglevel=0.05, random_state=42) # If p-value > siglevel, data should not be corrected if ec.p_value > 0.05: From 4ca59c11b2f80cc56329963a9404e3ac6f09a681 Mon Sep 17 00:00:00 2001 From: Colladito Date: Thu, 20 Nov 2025 10:42:16 +0100 Subject: [PATCH 11/13] [VCF] Update GEV and GPD --- bluemath_tk/distributions/gev.py | 139 ++++--------------------------- bluemath_tk/distributions/gpd.py | 32 +++---- tests/distributions/test_gpd.py | 6 +- 3 files changed, 35 insertions(+), 142 deletions(-) diff --git a/bluemath_tk/distributions/gev.py b/bluemath_tk/distributions/gev.py index 7b5fc9e..cceaa7b 100644 --- a/bluemath_tk/distributions/gev.py +++ b/bluemath_tk/distributions/gev.py @@ -1,7 +1,7 @@ from typing import Dict, List import numpy as np -from scipy.special import gamma +from scipy.stats import genextreme from ._base_distributions import BaseDistribution, FitResult, fit_dist @@ -117,22 +117,7 @@ def pdf( if scale <= 0: raise ValueError("Scale parameter must be > 0") - y = (x - loc) / scale - - # Gumbel case (shape = 0) - if shape == 0.0: - pdf = (1 / scale) * (np.exp(-y) * np.exp(-np.exp(-y))) - - # General case (Weibull and Frechet, shape != 0) - else: - pdf = np.full_like(x, 0, dtype=float) # 0 - yy = 1 + shape * y - yymask = yy > 0 - pdf[yymask] = (1 / scale) * ( - yy[yymask] ** (-1 - (1 / shape)) * np.exp(-(yy[yymask] ** (-1 / shape))) - ) - - return pdf + return genextreme.pdf(x, -shape, loc=loc, scale=scale) @staticmethod def cdf( @@ -167,17 +152,7 @@ def cdf( if scale <= 0: raise ValueError("Scale parameter must be > 0") - y = (x - loc) / scale - - # Gumbel case (shape = 0) - if shape == 0.0: - p = np.exp(-np.exp(-y)) - - # General case (Weibull and Frechet, shape != 0) - else: - p = np.exp(-(np.maximum(1 + shape * y, 0) ** (-1 / shape))) - - return p + return genextreme.cdf(x, -shape, loc=loc, scale=scale) @staticmethod def sf( @@ -212,9 +187,7 @@ def sf( if scale <= 0: raise ValueError("Scale parameter must be > 0") - sp = 1 - GEV.cdf(x, loc=loc, scale=scale, shape=shape) - - return sp + return genextreme.sf(x, -shape, loc=loc, scale=scale) @staticmethod def qf( @@ -255,15 +228,7 @@ def qf( if scale <= 0: raise ValueError("Scale parameter must be > 0") - # Gumbel case (shape = 0) - if shape == 0.0: - q = loc - scale * np.log(-np.log(p)) - - # General case (Weibull and Frechet, shape != 0) - else: - q = loc + scale * ((-np.log(p)) ** (-shape) - 1) / shape - - return q + return genextreme.ppf(p, -shape, loc=loc, scale=scale) @staticmethod def nll( @@ -291,35 +256,12 @@ def nll( """ if scale <= 0: - nll = np.inf # Return a large value for invalid scale + return np.inf # Return a large value for invalid scale else: - y = (data - loc) / scale - - # # Gumbel case (shape = 0) - # if shape == 0.0: - # pass - # nll = data.shape[0] * np.log(scale) + np.sum( - # np.exp(-y) + np.sum(-y) - # ) # Gumbel case - - # # General case (Weibull and Frechet, shape != 0) - # else: - - shape = ( - np.maximum(shape, 1e-8) if shape > 0 else np.minimum(shape, -1e-8) - ) # Avoid division by zero - y = 1 + shape * y - if any(y <= 0): - nll = np.inf # Return a large value for invalid y - else: - nll = ( - data.shape[0] * np.log(scale) - + np.sum(y ** (-1 / shape)) - + (1 / shape + 1) * np.sum(np.log(y)) - ) - - return nll + return -np.sum( + genextreme.logpdf(data, -shape, loc=loc, scale=scale), axis=0 + ) @staticmethod def fit(data: np.ndarray, **kwargs) -> FitResult: @@ -385,22 +327,9 @@ def random( if scale <= 0: raise ValueError("Scale parameter must be > 0") - # Set random state if provided - if random_state is not None: - np.random.seed(random_state) - - # Generate uniform random numbers - u = np.random.uniform(0, 1, size) - - # Gumbel case (shape = 0) - if shape == 0.0: - x = loc - scale * np.log(-np.log(u)) - - # General case (Weibull and Frechet, shape != 0) - else: - x = loc + scale * ((-np.log(u)) ** (-shape) - 1) / shape - - return x + return genextreme.rvs( + -shape, loc=loc, scale=scale, size=size, random_state=random_state + ) @staticmethod def mean(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: @@ -431,21 +360,7 @@ def mean(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: if scale <= 0: raise ValueError("Scale parameter must be > 0") - eu_cons = np.euler_gamma # Euler-Mascheroni constant - - # Gumbel case (shape = 0) - if shape == 0.0: - mean = loc + scale * eu_cons - - # General case (Weibull and Frechet, shape != 0 and shape < 1) - elif shape != 0.0 and shape < 1: - mean = loc + scale * (gamma(1 - shape) - 1) / shape - - # Shape >= 1 case - else: - mean = np.inf - - return mean + return genextreme.mean(-shape, loc=loc, scale=scale) @staticmethod def median(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: @@ -476,13 +391,7 @@ def median(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: if scale <= 0: raise ValueError("Scale parameter must be > 0") - if shape == 0.0: - median = loc - scale * np.log(np.log(2)) - - else: - median = loc + scale * ((np.log(2)) ** (-shape) - 1) / shape - - return median + return genextreme.median(-shape, loc=loc, scale=scale) @staticmethod def variance(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: @@ -513,21 +422,7 @@ def variance(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: if scale <= 0: raise ValueError("Scale parameter must be > 0") - # Gumbel case (shape = 0) - if shape == 0.0: - var = (np.pi**2 / 6) * scale**2 - - elif shape != 0.0 and shape < 0.5: - var = ( - (scale**2) - * (gamma(1 - 2 * shape) - (gamma(1 - shape) ** 2)) - / (shape**2) - ) - - else: - var = np.inf - - return var + return genextreme.var(-shape, loc=loc, scale=scale) @staticmethod def std(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: @@ -559,9 +454,7 @@ def std(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: if scale <= 0: raise ValueError("Scale parameter must be > 0") - std = np.sqrt(GEV.variance(loc, scale, shape)) - - return std + return genextreme.std(-shape, loc=loc, scale=scale) @staticmethod def stats( diff --git a/bluemath_tk/distributions/gpd.py b/bluemath_tk/distributions/gpd.py index c74a968..9bf5ff4 100644 --- a/bluemath_tk/distributions/gpd.py +++ b/bluemath_tk/distributions/gpd.py @@ -3,7 +3,7 @@ import numpy as np from scipy.stats import genpareto -from ._base_distributions import BaseDistribution, FitResult, fit_dist +from ._base_distributions import BaseDistribution, FitResult class GPD(BaseDistribution): @@ -116,7 +116,7 @@ def pdf( if scale <= 0: raise ValueError("Scale parameter must be > 0") - + return genpareto.pdf(x, c=shape, loc=threshold, scale=scale) @staticmethod @@ -222,7 +222,7 @@ def qf( If scale is not greater than 0. """ - if np.min(p) < 0 or np.max(p) > 1: + if np.min(p) <= 0 or np.max(p) >= 1: raise ValueError("Probabilities must be in the range (0, 1)") if scale <= 0: @@ -259,7 +259,9 @@ def nll( if np.any(data < threshold): return np.inf - return -np.sum(genpareto.logpdf(data, c=shape, loc=threshold, scale=scale), axis=0) + return -np.sum( + genpareto.logpdf(data, c=shape, loc=threshold, scale=scale), axis=0 + ) @staticmethod def fit(data: np.ndarray, threshold: float, **kwargs) -> FitResult: @@ -306,14 +308,9 @@ def obj(params): return GPD.nll(exceedances, threshold=threshold, scale=scale, shape=shape) # Perform optimization - from scipy.optimize import minimize, OptimizeResult - result = minimize( - fun=obj, - x0=x0, - method=method, - bounds=bounds, - options=options - ) + from scipy.optimize import OptimizeResult, minimize + + result = minimize(fun=obj, x0=x0, method=method, bounds=bounds, options=options) # Create modified result modified_result = OptimizeResult( @@ -326,7 +323,7 @@ def obj(params): # Return the fitting result return FitResult(GPD, exceedances, modified_result) - + @staticmethod def random( size: int, @@ -367,7 +364,9 @@ def random( if scale <= 0: raise ValueError("Scale parameter must be > 0") - return genpareto.rvs(c=shape, loc=threshold, scale=scale, size=size, random_state=random_state) + return genpareto.rvs( + c=shape, loc=threshold, scale=scale, size=size, random_state=random_state + ) @staticmethod def mean(threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: @@ -436,7 +435,9 @@ def median(threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> fl return genpareto.median(c=shape, loc=threshold, scale=scale) @staticmethod - def variance(threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float: + def variance( + threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0 + ) -> float: """ Variance @@ -501,7 +502,6 @@ def std(threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float return genpareto.std(c=shape, loc=threshold, scale=scale) - @staticmethod def stats( threshold: float = 0.0, scale: float = 1.0, shape: float = 0.0 diff --git a/tests/distributions/test_gpd.py b/tests/distributions/test_gpd.py index 6cf829b..7ecde43 100644 --- a/tests/distributions/test_gpd.py +++ b/tests/distributions/test_gpd.py @@ -104,12 +104,12 @@ def test_invalid_scale(self): def test_fit(self): # Generate data using specific parameters - loc, scale, shape = 0.5, 1.5, 0.2 - data = GPD.random(1000, loc, scale, shape, random_state=42) + threshold, scale, shape = 0.5, 1.5, 0.2 + data = GPD.random(1000, threshold, scale, shape, random_state=42) # Fit the GPD distribution to the data # loc is fixed at 0.0 - fit_result = GPD.fit(data - loc, f0=0.0) + fit_result = GPD.fit(data, threshold=threshold) # Check the fit result self.assertIsInstance(fit_result, FitResult) From 287be23aa2b21eda5854883505113a1986e9a358 Mon Sep 17 00:00:00 2001 From: Colladito Date: Thu, 20 Nov 2025 11:16:45 +0100 Subject: [PATCH 12/13] [VCF] Add number of observations per year in return period plots --- .../distributions/_base_distributions.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/bluemath_tk/distributions/_base_distributions.py b/bluemath_tk/distributions/_base_distributions.py index 469c2e9..4a831cc 100644 --- a/bluemath_tk/distributions/_base_distributions.py +++ b/bluemath_tk/distributions/_base_distributions.py @@ -399,7 +399,7 @@ def summary(self): print(f"Negative Log-Likelihood value: {self.nll:.4f}") print(f"{self.message}") - def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.axes]: + def plot(self, ax: plt.axes = None, plot_type="all", npy=1) -> Tuple[plt.figure, plt.axes]: """ Plots of fitting results: PP-plot, QQ-plot, histogram with fitted distribution, and return period plot. Parameters @@ -410,6 +410,8 @@ def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.ax Type of plot to create. Options are "hist" for histogram, "pp" for P-P plot, "qq" for Q-Q plot, "return_period" for return period plot, or "all" for all plots. Default is "all". + npy : int, optional + Number of observations per year. Default is 1. Returns ------- @@ -426,7 +428,7 @@ def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.ax self.pp(ax=axs[0, 0]) self.qq(ax=axs[0, 1]) self.hist(ax=axs[1, 0]) - self.return_period(ax=axs[1, 1]) + self.return_period(ax=axs[1, 1], npy=npy) plt.tight_layout() return fig, axs elif plot_type == "hist": @@ -436,7 +438,7 @@ def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.ax elif plot_type == "qq": return self.qq() elif plot_type == "return_period": - return self.return_period() + return self.return_period(npy=npy) else: raise ValueError( "Invalid plot type. Use 'hist', 'pp', 'qq', 'return_period', or 'all'." @@ -541,7 +543,7 @@ def hist(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]: return fig, ax - def return_period(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]: + def return_period(self, ax: plt.axes = None, npy=1) -> Tuple[plt.figure, plt.axes]: """ Return period plot of the fitted distribution. @@ -549,26 +551,28 @@ def return_period(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]: ---------- ax : matplotlib.axes.Axes, optional Axes to plot on. If None, a new figure and axes will be created. + npy : int, optional + Number of observations per year. Default is 1. """ if ax is None: fig, ax = plt.subplots(figsize=(8, 6)) else: fig = None - return_years = np.asarray([1.001, 1.1, 1.2, 1.3, 1.4, 1.5, 1.75, 2, 3, 4, 5, 7.5, 10, 15, 20, 25, 50, 100, 250, 500, 1000]) - ecdf_fitted = 1 - 1/return_years + return_years = np.asarray([1.001, 1.1, 1.2, 1.3, 1.4, 1.5, 1.75, 2, 3, 4, 5, 7.5, 10, 15, 20, 25, 50, 100, 250, 500, 1000, 10000]) + ecdf_fitted = 1 - 1/(return_years) sorted_data = np.sort(self.data) exceedance_prob = 1 - self.ecdf - return_period = 1 / exceedance_prob + return_period = 1 / (exceedance_prob) ax.plot( - return_years, + return_years / npy, self.dist.qf(ecdf_fitted, *self.params), color="tab:red", label="Fitted Distribution", ) ax.plot( - return_period, + return_period / npy, sorted_data, marker="o", linestyle="", @@ -580,7 +584,7 @@ def return_period(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]: ax.set_xticks([1, 2, 5, 10, 25, 50, 100, 250, 1000, 10000]) ax.set_xticklabels([1, 2, 5, 10, 25, 50, 100, 500, 1000, 10000]) # ax.set_xlim(right=np.max(return_period) * 1.2) - ax.set_xlabel("Return Period") + ax.set_xlabel("Return Period (Years)") ax.set_ylabel("Data Values") ax.set_title("Return Period Plot") ax.legend() From 6137212b27c9d9d7a8003bce1d86cd00d0bd88f1 Mon Sep 17 00:00:00 2001 From: Colladito Date: Thu, 20 Nov 2025 12:26:21 +0100 Subject: [PATCH 13/13] [VCF] Some improvements in pot and extr_corr_utils --- bluemath_tk/distributions/pot.py | 2 +- bluemath_tk/distributions/utils/extr_corr_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bluemath_tk/distributions/pot.py b/bluemath_tk/distributions/pot.py index d96092d..454a85b 100644 --- a/bluemath_tk/distributions/pot.py +++ b/bluemath_tk/distributions/pot.py @@ -318,7 +318,7 @@ def fit( _, _, _, self.pks, self.pks_idx, _ = pot( self.data, self.threshold, self.n0, self.min_peak_distance ) - return self.threshold, self.pks, self.pks_idx + return self.threshold.item(), self.pks, self.pks_idx def studentized_residuals( self, diff --git a/bluemath_tk/distributions/utils/extr_corr_utils.py b/bluemath_tk/distributions/utils/extr_corr_utils.py index 78864db..a916423 100644 --- a/bluemath_tk/distributions/utils/extr_corr_utils.py +++ b/bluemath_tk/distributions/utils/extr_corr_utils.py @@ -30,7 +30,7 @@ def gpdpoiss_ci_rp_bootstrap( boot_params = np.zeros((B, 3)) for i in range(B): fit_result = GPD.fit(boot_samples[i], threshold=threshold) - boot_params[i, :] = fit_result.params + boot_params[i, :] = np.asarray(fit_result.params) return_periods = np.array( [