Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions bluemath_tk/distributions/_base_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def summary(self):
print(f"Negative Log-Likelihood value: {self.nll:.4f}")
print(f"{self.message}")

def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.axes]:
def plot(self, ax: plt.axes = None, plot_type="all", npy=1) -> Tuple[plt.figure, plt.axes]:
"""
Plots of fitting results: PP-plot, QQ-plot, histogram with fitted distribution, and return period plot.
Parameters
Expand All @@ -410,6 +410,8 @@ def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.ax
Type of plot to create. Options are "hist" for histogram, "pp" for P-P plot,
"qq" for Q-Q plot, "return_period" for return period plot, or "all" for all plots.
Default is "all".
npy : int, optional
Number of observations per year. Default is 1.

Returns
-------
Expand All @@ -426,7 +428,7 @@ def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.ax
self.pp(ax=axs[0, 0])
self.qq(ax=axs[0, 1])
self.hist(ax=axs[1, 0])
self.return_period(ax=axs[1, 1])
self.return_period(ax=axs[1, 1], npy=npy)
plt.tight_layout()
return fig, axs
elif plot_type == "hist":
Expand All @@ -436,7 +438,7 @@ def plot(self, ax: plt.axes = None, plot_type="all") -> Tuple[plt.figure, plt.ax
elif plot_type == "qq":
return self.qq()
elif plot_type == "return_period":
return self.return_period()
return self.return_period(npy=npy)
else:
raise ValueError(
"Invalid plot type. Use 'hist', 'pp', 'qq', 'return_period', or 'all'."
Expand Down Expand Up @@ -541,34 +543,36 @@ def hist(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]:

return fig, ax

def return_period(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]:
def return_period(self, ax: plt.axes = None, npy=1) -> Tuple[plt.figure, plt.axes]:
"""
Return period plot of the fitted distribution.

Parameters
----------
ax : matplotlib.axes.Axes, optional
Axes to plot on. If None, a new figure and axes will be created.
npy : int, optional
Number of observations per year. Default is 1.
"""
if ax is None:
fig, ax = plt.subplots(figsize=(8, 6))
else:
fig = None

return_years = np.asarray([1.001, 1.1, 1.2, 1.3, 1.4, 1.5, 1.75, 2, 3, 4, 5, 7.5, 10, 15, 20, 25, 50, 100, 250, 500, 1000])
ecdf_fitted = 1 - 1/return_years
return_years = np.asarray([1.001, 1.1, 1.2, 1.3, 1.4, 1.5, 1.75, 2, 3, 4, 5, 7.5, 10, 15, 20, 25, 50, 100, 250, 500, 1000, 10000])
ecdf_fitted = 1 - 1/(return_years)
sorted_data = np.sort(self.data)
exceedance_prob = 1 - self.ecdf
return_period = 1 / exceedance_prob
return_period = 1 / (exceedance_prob)

ax.plot(
return_years,
return_years / npy,
self.dist.qf(ecdf_fitted, *self.params),
color="tab:red",
label="Fitted Distribution",
)
ax.plot(
return_period,
return_period / npy,
sorted_data,
marker="o",
linestyle="",
Expand All @@ -580,7 +584,7 @@ def return_period(self, ax: plt.axes = None) -> Tuple[plt.figure, plt.axes]:
ax.set_xticks([1, 2, 5, 10, 25, 50, 100, 250, 1000, 10000])
ax.set_xticklabels([1, 2, 5, 10, 25, 50, 100, 500, 1000, 10000])
# ax.set_xlim(right=np.max(return_period) * 1.2)
ax.set_xlabel("Return Period")
ax.set_xlabel("Return Period (Years)")
ax.set_ylabel("Data Values")
ax.set_title("Return Period Plot")
ax.legend()
Expand Down
2 changes: 1 addition & 1 deletion bluemath_tk/distributions/extreme_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def fit(
self.threshold, self.pot_data, pot_idx = opt_threshold.fit()
self.poiss_parameter = self.pot_data.size / self.am_data.size

fit_result = GPD.fit(self.pot_data, f0=self.threshold)
fit_result = GPD.fit(self.pot_data, threshold=self.threshold)

# If Annual Maxima used in fitting step
if self.method == "am":
Expand Down
139 changes: 16 additions & 123 deletions bluemath_tk/distributions/gev.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Dict, List

import numpy as np
from scipy.special import gamma
from scipy.stats import genextreme

from ._base_distributions import BaseDistribution, FitResult, fit_dist

Expand Down Expand Up @@ -117,22 +117,7 @@ def pdf(
if scale <= 0:
raise ValueError("Scale parameter must be > 0")

y = (x - loc) / scale

# Gumbel case (shape = 0)
if shape == 0.0:
pdf = (1 / scale) * (np.exp(-y) * np.exp(-np.exp(-y)))

# General case (Weibull and Frechet, shape != 0)
else:
pdf = np.full_like(x, 0, dtype=float) # 0
yy = 1 + shape * y
yymask = yy > 0
pdf[yymask] = (1 / scale) * (
yy[yymask] ** (-1 - (1 / shape)) * np.exp(-(yy[yymask] ** (-1 / shape)))
)

return pdf
return genextreme.pdf(x, -shape, loc=loc, scale=scale)

@staticmethod
def cdf(
Expand Down Expand Up @@ -167,17 +152,7 @@ def cdf(
if scale <= 0:
raise ValueError("Scale parameter must be > 0")

y = (x - loc) / scale

# Gumbel case (shape = 0)
if shape == 0.0:
p = np.exp(-np.exp(-y))

# General case (Weibull and Frechet, shape != 0)
else:
p = np.exp(-(np.maximum(1 + shape * y, 0) ** (-1 / shape)))

return p
return genextreme.cdf(x, -shape, loc=loc, scale=scale)

@staticmethod
def sf(
Expand Down Expand Up @@ -212,9 +187,7 @@ def sf(
if scale <= 0:
raise ValueError("Scale parameter must be > 0")

sp = 1 - GEV.cdf(x, loc=loc, scale=scale, shape=shape)

return sp
return genextreme.sf(x, -shape, loc=loc, scale=scale)

@staticmethod
def qf(
Expand Down Expand Up @@ -255,15 +228,7 @@ def qf(
if scale <= 0:
raise ValueError("Scale parameter must be > 0")

# Gumbel case (shape = 0)
if shape == 0.0:
q = loc - scale * np.log(-np.log(p))

# General case (Weibull and Frechet, shape != 0)
else:
q = loc + scale * ((-np.log(p)) ** (-shape) - 1) / shape

return q
return genextreme.ppf(p, -shape, loc=loc, scale=scale)

@staticmethod
def nll(
Expand Down Expand Up @@ -291,35 +256,12 @@ def nll(
"""

if scale <= 0:
nll = np.inf # Return a large value for invalid scale
return np.inf # Return a large value for invalid scale

else:
y = (data - loc) / scale

# # Gumbel case (shape = 0)
# if shape == 0.0:
# pass
# nll = data.shape[0] * np.log(scale) + np.sum(
# np.exp(-y) + np.sum(-y)
# ) # Gumbel case

# # General case (Weibull and Frechet, shape != 0)
# else:

shape = (
np.maximum(shape, 1e-8) if shape > 0 else np.minimum(shape, -1e-8)
) # Avoid division by zero
y = 1 + shape * y
if any(y <= 0):
nll = np.inf # Return a large value for invalid y
else:
nll = (
data.shape[0] * np.log(scale)
+ np.sum(y ** (-1 / shape))
+ (1 / shape + 1) * np.sum(np.log(y))
)

return nll
return -np.sum(
genextreme.logpdf(data, -shape, loc=loc, scale=scale), axis=0
)

@staticmethod
def fit(data: np.ndarray, **kwargs) -> FitResult:
Expand Down Expand Up @@ -385,22 +327,9 @@ def random(
if scale <= 0:
raise ValueError("Scale parameter must be > 0")

# Set random state if provided
if random_state is not None:
np.random.seed(random_state)

# Generate uniform random numbers
u = np.random.uniform(0, 1, size)

# Gumbel case (shape = 0)
if shape == 0.0:
x = loc - scale * np.log(-np.log(u))

# General case (Weibull and Frechet, shape != 0)
else:
x = loc + scale * ((-np.log(u)) ** (-shape) - 1) / shape

return x
return genextreme.rvs(
-shape, loc=loc, scale=scale, size=size, random_state=random_state
)

@staticmethod
def mean(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
Expand Down Expand Up @@ -431,21 +360,7 @@ def mean(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
if scale <= 0:
raise ValueError("Scale parameter must be > 0")

eu_cons = np.euler_gamma # Euler-Mascheroni constant

# Gumbel case (shape = 0)
if shape == 0.0:
mean = loc + scale * eu_cons

# General case (Weibull and Frechet, shape != 0 and shape < 1)
elif shape != 0.0 and shape < 1:
mean = loc + scale * (gamma(1 - shape) - 1) / shape

# Shape >= 1 case
else:
mean = np.inf

return mean
return genextreme.mean(-shape, loc=loc, scale=scale)

@staticmethod
def median(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
Expand Down Expand Up @@ -476,13 +391,7 @@ def median(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
if scale <= 0:
raise ValueError("Scale parameter must be > 0")

if shape == 0.0:
median = loc - scale * np.log(np.log(2))

else:
median = loc + scale * ((np.log(2)) ** (-shape) - 1) / shape

return median
return genextreme.median(-shape, loc=loc, scale=scale)

@staticmethod
def variance(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
Expand Down Expand Up @@ -513,21 +422,7 @@ def variance(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
if scale <= 0:
raise ValueError("Scale parameter must be > 0")

# Gumbel case (shape = 0)
if shape == 0.0:
var = (np.pi**2 / 6) * scale**2

elif shape != 0.0 and shape < 0.5:
var = (
(scale**2)
* (gamma(1 - 2 * shape) - (gamma(1 - shape) ** 2))
/ (shape**2)
)

else:
var = np.inf

return var
return genextreme.var(-shape, loc=loc, scale=scale)

@staticmethod
def std(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
Expand Down Expand Up @@ -559,9 +454,7 @@ def std(loc: float = 0.0, scale: float = 1.0, shape: float = 0.0) -> float:
if scale <= 0:
raise ValueError("Scale parameter must be > 0")

std = np.sqrt(GEV.variance(loc, scale, shape))

return std
return genextreme.std(-shape, loc=loc, scale=scale)

@staticmethod
def stats(
Expand Down
Loading