diff --git a/CHANGELOG.md b/CHANGELOG.md index a1e06c193..5503705ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ Attention: The newest changes should be on top --> - ENH: Enable only radial burning [#815](https://github.com/RocketPy-Team/RocketPy/pull/815) - ENH: Add thrustcurve api integration to retrieve motor eng data [#870](https://github.com/RocketPy-Team/RocketPy/pull/870) - ENH: custom warning no motor or aerosurface [#871](https://github.com/RocketPy-Team/RocketPy/pull/871) +- ENH: Implement Bootstrapping for Confidence Interval Estimation [#891](https://github.com/RocketPy-Team/RocketPy/pull/897) ### Changed diff --git a/docs/user/mrs.rst b/docs/user/mrs.rst index ec598960e..e58f7a784 100644 --- a/docs/user/mrs.rst +++ b/docs/user/mrs.rst @@ -204,6 +204,37 @@ Finally, we can compare the ellipses for the apogees and landing points using th Note we can pass along parameters used in the usual `ellipses` method of the `MonteCarlo` class, in this case the `ylim` argument to expand the y-axis limits. + +Calculating Confidence Intervals +-------------------------------- + +Beyond visual comparisons, you may want to calculate statistical confidence intervals +for specific attributes of the flight (e.g., apogee, max velocity) based on the +resampled data. Since the resulting data is loaded into a ``MonteCarlo`` object, +you can use its method to compute these intervals using the bootstrap method. + +The following example shows how to calculate the 95% confidence interval for the +mean of the apogee using the ``mrs_results`` object created earlier: + +.. jupyter-execute:: + + # Calculate the 95% Confidence Interval for the mean apogee + # We pass np.mean as the statistic to be evaluated + apogee_ci = mrs_results.calculate_confidence_interval( + attribute="apogee", + statistic=np.mean, + confidence_level=0.95, + n_resamples=10000 + ) + + print(f"95% Confidence Interval for Apogee Mean: {apogee_ci}") + +You can use any statistic function that accepts an array of data, such as ``np.median`` +or ``np.std``. + + + + Time Comparison --------------- diff --git a/rocketpy/simulation/monte_carlo.py b/rocketpy/simulation/monte_carlo.py index b94c165b3..e10789a7d 100644 --- a/rocketpy/simulation/monte_carlo.py +++ b/rocketpy/simulation/monte_carlo.py @@ -22,6 +22,7 @@ import numpy as np import simplekml +from scipy.stats import bootstrap from rocketpy._encoders import RocketPyEncoder from rocketpy.plots.monte_carlo_plots import _MonteCarloPlots @@ -463,6 +464,67 @@ def __run_single_simulation(self): time_overshoot=self.flight.time_overshoot, ) + def estimate_confidence_interval( + self, + attribute, + statistic=np.mean, + confidence_level=0.95, + n_resamples=1000, + random_state=None, + ): + """ + Estimates the confidence interval for a specific attribute of the results + using the bootstrap method. + + Parameters + ---------- + attribute : str + The name of the attribute stored in self.results (e.g., "apogee", "max_velocity"). + statistic : callable, optional + A function that computes the statistic of interest (e.g., np.mean, np.std). + Default is np.mean. + confidence_level : float, optional + The confidence level for the interval (between 0 and 1). Default is 0.95. + n_resamples : int, optional + The number of resamples to perform. Default is 1000. + random_state : int or None, optional + Seed for the random number generator to ensure reproducibility. If None (default), the random number generator is not seeded. + + Returns + ------- + confidence_interval : ConfidenceInterval + An object containing the low and high bounds of the confidence interval. + Access via .low and .high. + """ + if attribute not in self.results: + available = list(self.results.keys()) + raise ValueError( + f"Attribute '{attribute}' not found in results. Available attributes: {available}" + ) + + if not 0 < confidence_level < 1: + raise ValueError( + f"confidence_level must be between 0 and 1, got {confidence_level}" + ) + + if not isinstance(n_resamples, int) or n_resamples <= 0: + raise ValueError( + f"n_resamples must be a positive integer, got {n_resamples}" + ) + + data = (np.array(self.results[attribute]),) + + res = bootstrap( + data, + statistic, + confidence_level=confidence_level, + n_resamples=n_resamples, + random_state=random_state, + method="percentile", + ) + + return res.confidence_interval + def __evaluate_flight_inputs(self, sim_idx): """Evaluates the inputs of a single flight simulation. diff --git a/tests/unit/simulation/test_monte_carlo.py b/tests/unit/simulation/test_monte_carlo.py index f168b8bfe..5595e46bb 100644 --- a/tests/unit/simulation/test_monte_carlo.py +++ b/tests/unit/simulation/test_monte_carlo.py @@ -1,5 +1,8 @@ import matplotlib as plt import numpy as np +import pytest + +from rocketpy.simulation import MonteCarlo plt.rcParams.update({"figure.max_open_warning": 0}) @@ -60,3 +63,125 @@ def test_stochastic_calisto_create_object_with_static_margin(stochastic_calisto) assert np.isclose(np.mean(all_margins), 2.2625350013000434, rtol=0.15) assert np.isclose(np.std(all_margins), 0.1, atol=0.2) + + +class MockMonteCarlo(MonteCarlo): + """Create a mock class to test the method without running a real simulation""" + + def __init__(self): + # pylint: disable=super-init-not-called + + # Simulate pre-calculated results + # Example: a normal distribution centered on 100 for the apogee + self.results = { + "apogee": [98, 102, 100, 99, 101, 100, 97, 103], + "max_velocity": [250, 255, 245, 252, 248], + "single_point": [100], + "empty_attribute": [], + } + + +def test_estimate_confidence_interval_contains_known_mean(): + """Checks that the confidence interval contains the known mean.""" + mc = MockMonteCarlo() + + ci = mc.estimate_confidence_interval("apogee", confidence_level=0.95) + + assert ci.low < 100 < ci.high + assert ci.low < ci.high + + +def test_estimate_confidence_interval_supports_custom_statistic(): + """Checks that the statistic can be changed (e.g., standard deviation instead of mean).""" + mc = MockMonteCarlo() + + ci_std = mc.estimate_confidence_interval("apogee", statistic=np.std) + + assert ci_std.low > 0 + assert ci_std.low < ci_std.high + + +def test_estimate_confidence_interval_raises_value_error_when_attribute_missing(): + """Checks that the code raises an error if the key does not exist.""" + mc = MockMonteCarlo() + + # Request a variable that does not exist ("altitude" is not in our mock) + with pytest.raises(ValueError) as excinfo: + mc.estimate_confidence_interval("altitude") + + assert "not found in results" in str(excinfo.value) + + +def test_estimate_confidence_interval_increases_width_with_higher_confidence_level(): + """Checks that a higher confidence level yields a wider interval.""" + mc = MockMonteCarlo() + + ci_90 = mc.estimate_confidence_interval("apogee", confidence_level=0.90) + width_90 = ci_90.high - ci_90.low + + ci_99 = mc.estimate_confidence_interval("apogee", confidence_level=0.99) + width_99 = ci_99.high - ci_99.low + + # The more confident we want to be (99%), the wider the interval must be + assert width_99 >= width_90 + + +def test_estimate_confidence_interval_raises_value_error_when_confidence_level_out_of_bounds(): + """Checks that validation fails if confidence_level is not strictly between 0 and 1.""" + mc = MockMonteCarlo() + + # Case 1: Value <= 0 + with pytest.raises(ValueError, match="confidence_level must be between 0 and 1"): + mc.estimate_confidence_interval("apogee", confidence_level=0) + + with pytest.raises(ValueError, match="confidence_level must be between 0 and 1"): + mc.estimate_confidence_interval("apogee", confidence_level=-0.5) + + # Case 2: Value >= 1 + with pytest.raises(ValueError, match="confidence_level must be between 0 and 1"): + mc.estimate_confidence_interval("apogee", confidence_level=1) + + with pytest.raises(ValueError, match="confidence_level must be between 0 and 1"): + mc.estimate_confidence_interval("apogee", confidence_level=1.5) + + +def test_estimate_confidence_interval_raises_value_error_when_n_resamples_invalid(): + """Checks that validation fails if n_resamples is not a positive integer.""" + mc = MockMonteCarlo() + + # Case 1: Not an integer (e.g. float) + with pytest.raises(ValueError, match="n_resamples must be a positive integer"): + mc.estimate_confidence_interval("apogee", n_resamples=1000.5) + + # Case 2: Zero or Negative + with pytest.raises(ValueError, match="n_resamples must be a positive integer"): + mc.estimate_confidence_interval("apogee", n_resamples=0) + + with pytest.raises(ValueError, match="n_resamples must be a positive integer"): + mc.estimate_confidence_interval("apogee", n_resamples=-100) + + +def test_estimate_confidence_interval_raises_value_error_on_empty_data_list(): + """Checks behavior when the attribute exists but contains no data (empty list).""" + mc = MockMonteCarlo() + + with pytest.raises(ValueError): + mc.estimate_confidence_interval("empty_attribute") + + +def test_estimate_confidence_interval_handles_single_data_point(): + """Checks behavior with only one data point. The CI should be [val, val].""" + mc = MockMonteCarlo() + + with pytest.raises(ValueError): # two or more value + mc.estimate_confidence_interval("single_point", n_resamples=50) + + +def test_estimate_confidence_interval_raises_type_error_for_invalid_statistic(): + """Checks that passing a non-callable object (like a string/int) as statistic raises TypeError.""" + mc = MockMonteCarlo() + with pytest.raises(TypeError): + mc.estimate_confidence_interval("apogee", statistic=1) + + with pytest.raises(TypeError): + mc.estimate_confidence_interval("apogee", statistic="not_a_function")