From 15f5438fbbff4ae2f9197e7e82e05d7632c961e3 Mon Sep 17 00:00:00 2001 From: Felix Oesterle Date: Tue, 24 Feb 2026 15:12:23 +0100 Subject: [PATCH 01/20] [com8MoTPSA] Squashed branch: MoT-PSA workflow, optimisation, and ana6 Squash of 20 commits from RF_com8MoTPSA branch including: - com8MoTPSA workflow improvements (chunked multiprocessing, path handling) - Bayesian optimisation integration (ana6Optimisation module) - Morris sensitivity analysis scripts - AIMEC runout reference implementation - probAna pickle saving and bounds - Plotting and config improvements --- avaframe/ana4Stats/probAnaCfg.ini | 6 +- avaframe/ana6Optimisation/README_ana6.md | 140 +++ .../ana6Optimisation/optimisationUtils.py | 886 +++++++++++++++++ avaframe/ana6Optimisation/runMorrisSA.py | 97 ++ avaframe/ana6Optimisation/runMorrisSACfg.ini | 46 + avaframe/ana6Optimisation/runOptimisation.py | 152 +++ .../ana6Optimisation/runOptimisationCfg.ini | 60 ++ .../runPlotMorrisConvergence.py | 30 + avaframe/com1DFA/com1DFATools.py | 1 + avaframe/com8MoTPSA/com8MoTPSA.py | 84 +- avaframe/com8MoTPSA/com8MoTPSACfg.ini | 4 + avaframe/in3Utils/cfgUtils.py | 33 +- avaframe/out3Plot/outAna6Plots.py | 891 ++++++++++++++++++ avaframe/runScripts/runPlotAreaRefDiffs.py | 218 +++-- 14 files changed, 2526 insertions(+), 122 deletions(-) create mode 100644 avaframe/ana6Optimisation/README_ana6.md create mode 100644 avaframe/ana6Optimisation/optimisationUtils.py create mode 100644 avaframe/ana6Optimisation/runMorrisSA.py create mode 100644 avaframe/ana6Optimisation/runMorrisSACfg.ini create mode 100644 avaframe/ana6Optimisation/runOptimisation.py create mode 100644 avaframe/ana6Optimisation/runOptimisationCfg.ini create mode 100644 avaframe/ana6Optimisation/runPlotMorrisConvergence.py create mode 100644 avaframe/out3Plot/outAna6Plots.py diff --git a/avaframe/ana4Stats/probAnaCfg.ini b/avaframe/ana4Stats/probAnaCfg.ini index 1859b48b4..bdddd6e8a 100644 --- a/avaframe/ana4Stats/probAnaCfg.ini +++ b/avaframe/ana4Stats/probAnaCfg.ini @@ -40,7 +40,7 @@ samplingStrategy = 1 # #++++++VARIATION INFO FOR DRAW SAMPLES FROM FULL SET OF VARIATIONS # type of parameters that shall be varied -separated by | (options: float) varParType = float|float -# factor used to create the number of samples, if morris number of samples depends on number of varied variables and number of trajectories, for now use nSample as number of trajectories +# factor used to create the number of samples, if morris: number of samples depends on number of varied variables and number of trajectories, for now use nSample as number of trajectories, n >=2 for morris nSample = 40 # sample method used to create sample (options: latin, morris) sampleMethod = latin @@ -71,13 +71,13 @@ frictModel = samosAT [com4FlowPy_com4FlowPy_override] -# use default com1DFA config as base configuration (True) and override following parameters +# use default com4FlowPy config as base configuration (True) and override following parameters # if False and local is available use local defaultConfig = True [com8MoTPSA_com8MoTPSA_override] -# use default com1DFA config as base configuration (True) and override following parameters +# use default com8MoTPSA config as base configuration (True) and override following parameters # if False and local is available use local defaultConfig = True diff --git a/avaframe/ana6Optimisation/README_ana6.md b/avaframe/ana6Optimisation/README_ana6.md new file mode 100644 index 000000000..bca9db16c --- /dev/null +++ b/avaframe/ana6Optimisation/README_ana6.md @@ -0,0 +1,140 @@ +# ana6 – Sensitivity Analysis & Optimisation + +The `ana6Optimisation` module provides tools for performing Morris sensitivity analysis and parameter optimisation within the AvaFrame workflow. It supports input parameter ranking, convergence analysis of sensitivity indices and surrogate-based optimisation strategies. The module can be used either sequentially (Morris analysis followed by optimisation) or independently for direct optimisation. + +--- + +## Module Structure + +The module contains the following files: + +- `runMorrisSA.py` (configuration: `runMorrisSACfg.ini`) +- `runPlotMorrisConvergence.py` (uses `runMorrisSACfg.ini`) +- `runOptimisation.py` (configuration: `runOptimisationCfg.ini`) +- `optimisationUtils.py` + +--- + +## Workflow +### Reference Data and Working Directory + +All scripts must be executed within the directory: `avaframe/ana6Optimisation` + +In `avaframeCfg.ini`, the avalanche reference directory (`avalancheDir`) must include the suffix `../`, for example: `../data/avaFleisskar` + +This ensures correct relative path resolution within the AvaFrame project structure. + +To compute goodness-of-fit metrics between reference and simulation results and to perform AIMEC analysis, the following reference data must be provided in: `avaframe/data//Inputs` + +The required folder structure is: +Folder: +- **LINES** + Contains the AIMEC path. + +- **POLYGONS** + Contains Cropshape and defines the maximal extent of runout area that is used for calculating areal indicators. + +- **REFDATA** + Defines the runout area of the reference event. + +- **REL** + Defines the release area of the avalanche event. + +File: +- **Digital Elevation Model (DEM)** + Must be placed directly in the `Inputs` directory and must cover the entire affected area. + +More Details here: https://docs.avaframe.org/en/latest/moduleCom1DFA.html + +___ + +### Morris Sensitivity Analysis (MorrisSA) + +The Morris sensitivity analysis provides a ranking of input parameters based on their influence on the model response. + +Before running `runMorrisSA.py`, the following step is required prior: + +- Execute `runAna4ProbAnaCom8MoTPSA` +- In `probAnaCfg.ini`: + - Set the sampling method to `'morris'` + - Define the number of Morris trajectories (`nSample`) + - Select the input parameters and define their variation bounds + +This step generates the required simulations and stores the sampled parameters and their bounds in a pickle file. + +**Afterwards:** + +- Run `runMorrisSA.py` +- Configure settings via `runMorrisSACfg.ini` +- The `MORRIS_CONVERGENCE` setting can be ignored for standard sensitivity analysis + +**Outputs:** + +- Pickle file containing: + - Ranked input parameters + - Morris sensitivity indices + - Parameter bounds +- Visualisation plots of the sensitivity results + +--- + +### Morris Convergence Analysis + +The convergence analysis evaluates how the Morris sensitivity indices stabilise with increasing numbers of trajectories. Its purpose is to determine the minimum number of trajectories that yields robust results. + +**Requirements:** + +- Run `runAna4ProbAnaCom8MoTPSA` multiple times with different numbers of Morris trajectories +- Rename Output folders afterwards with the following naming convention: OutputsR + + +where `` corresponds to the number of trajectories + +This process is computationally expensive, as it requires a large number of simulations. + +**Execution:** + +- Run `runPlotMorrisConvergence.py` + +**Output:** + +- Convergence plots of Morris sensitivity indices + +--- + +### Optimisation + +The optimisation process identifies the set of input parameters that yields the best agreement between simulation results and a defined reference. "Best" is defined by the objective function implemented in the optimisation routine. + +Optimisation can be performed in two ways: + +**With prior Morris analysis:** + - Parameter ranking is available + - Parameter bounds are already defined + - Execute `runOpmisiation.py` with scenario 1 in `runOptimisationCfg.ini` + +**Without prior Morris analysis:** + - Execute `runAna4ProbAnaCom8MoTPSA.py` to generate some initial samples (for surrogate) + - In `probAnaCfg.ini`: + - Set the sampling method to `'latin'` + - Define the number of model runs (`nSample`) + - Select the input parameters and define their variation bounds + - Execute `runOpmisiation.py` with scenario 2 in `runOptimisationCfg.ini` + +**Two optimisation strategies are implemented:** + +- Surrogate-based non-sequential optimisation +- Surrogate-based Bayesian (sequential) optimisation + +**Outputs:** + +- Optimal parameter set +- Visualisation plots of the optimisations results and progress + +--- + +## Notes + +- Performing Morris sensitivity analysis before optimisation is recommended to reduce the parameter space. +- Convergence analysis significantly increases computational cost. +- All workflows are controlled via `.ini` configuration files. \ No newline at end of file diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py new file mode 100644 index 000000000..f2314bfbb --- /dev/null +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -0,0 +1,886 @@ +import numpy as np +import pathlib +import pickle +import pandas as pd +import configparser +import os +import time +from scipy.stats import norm, qmc +from datetime import datetime +import re + +from sklearn.model_selection import KFold, cross_validate +from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import Pipeline +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel + +from avaframe.in3Utils import cfgUtils, initializeProject, logUtils +from avaframe.com8MoTPSA import com8MoTPSA +from avaframe.ana4Stats import probAna +from avaframe.runScripts.runPlotAreaRefDiffs import runPlotAreaRefDiffs +import avaframe.out3Plot.outAna6Plots as saveResults + + +def calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC): + """ + Calculate areal indicators between reference polygon and simulation and AIMEC analysis and save data in ana3AIMEC and out1Peak. + + Parameters + ---------- + cfgOpt : configparser.ConfigParser + Global configuration object. Must contain section "GENERAL" + with keys: + - resType + - thresholdValueSimulation + - modName + avalancheDir : str + Directory containing the directory of the reference avalanche + ana3AIMEC : module + AIMEC analysis module providing `fullAimecAnalysis()`. + + """ + # ToDo: to reduce comp. cost: run AIMEC and calcArealIndicators only for new simulations + # Areal indicators + resType = cfgOpt["GENERAL"]["resType"] + thresholdValueSimulation = float(cfgOpt["GENERAL"]["thresholdValueSimulation"]) + modName = cfgOpt["GENERAL"]["modName"] + runPlotAreaRefDiffs(resType, thresholdValueSimulation, modName) + + # AIMEC + cfgAIMEC = cfgUtils.getModuleConfig(ana3AIMEC) + rasterTransfo, resAnalysisDF, plotDict, _, pathDict = ana3AIMEC.fullAimecAnalysis(avalancheDir, cfgAIMEC) + + +def readParamSetDF(inDir, varParList): + """ + Read parameter sets from .ini files in a directory and build a DataFrame. + + Parameters + ---------- + inDir : str or pathlib.Path + Path to directory containing .ini files. + varParList : list of str + List of parameter names to extract values from each .ini file. + + Returns + ------- + paramSetDF : pandas.DataFrame + DataFrame with simName, parameterSet and order as columns + """ + + # List to hold all parameters sets + paramSet = [] + order = [] + sampleMethods = [] + filenames = [] + + # Loop over all files in the folder + for filename in os.listdir(inDir): + if filename.endswith('.ini') and 'sourceConfiguration' not in filename: + filepath = os.path.join(inDir, filename) + + # Load the .ini file + config = configparser.ConfigParser() + config.read(filepath) + + if 'VISUALISATION' in config.sections(): + # config is inifile + index = config['VISUALISATION']['scenario'] + + if 'VISUALISATION' in config.sections(): + # config is inifile + index = config['VISUALISATION']['scenario'] + if 'sampleMethod' in config['VISUALISATION']: + sampleMethod = config['VISUALISATION']['sampleMethod'] + else: + sampleMethod = np.nan + + row = [] # row contains 1 row + for param in varParList: + section = probAna.fetchParameterSection(config, param) + value = config[section][param] + value = float(value) + row.append(value) + + order.append(index) + sampleMethods.append(sampleMethod) + paramSet.append(row) # rows contains all rows + filenames.append(os.path.splitext(filename)[0]) + + # Convert to pandas DF + paramSetDF = pd.DataFrame({ + 'simName': filenames, + 'parameterSet': paramSet, # [row for row in paramSet], # Wrap each row as a list + 'order': pd.to_numeric(order), # convert to int + 'sampleMethods': sampleMethods + }) + return paramSetDF + + +def readArealIndicators(inDir): + """ + Read areal indicator results from a pickle file and convert to a DataFrame. + + Parameters + ---------- + inDir : str or pathlib.Path + Path to pickle file containing indicator results. + + Returns + ------- + indicatorsDF : pandas.DataFrame + DataFrame with simName and areal indicators, + """ + with open(inDir, "rb") as f: + all_results = pickle.load(f) + + indicatorsDF = pd.DataFrame(all_results) + return indicatorsDF + + +def addLossMetrics(df, referenceDF, cfgOpt): + """ + Compute evaluation metrics (recall, precision, F1, Tversky score) and a combined weighted Loss (or optimisation) + variable from a given DataFrame. + + The metrics are based on area (number of pixel would also be possible). Invalid values + (division by zero) are replaced with 0. + + Parameters + ---------- + df : pandas.DataFrame + Input DataFrame with at least the columns + ``TP_SimRef_area``, ``FP_SimRef_area``, ``FN_SimRef_area``. + referenceDF: pandas.Dataframe + Dataframe with information of the reference in AIMEC e.g. reference_sRunout of the polygon + cfgOpt: configparser.ConfigParser + Config parser of the runOptimisationCfg.ini file + + Returns + ------- + df : pandas.DataFrame + Same DataFrame with additional columns: + - ``recall`` : float + - ``precision`` : float + - ``f1_score`` : float + - ``tversky_score`` : float + - ``optimisationVariable`` : float + """ + # Decide if loss function is based on ncells or area + basedOn = '_area' + TP = df[f"TP_SimRef{basedOn}"] + FP = df[f"FP_SimRef{basedOn}"] + FN = df[f"FN_SimRef{basedOn}"] + + # Recall = TP / (TP + FN) + denomRecall = TP + FN + df["recall"] = np.where(denomRecall != 0, TP / denomRecall, 0.0) + # Precision = TP / (TP + FP) + denomPrecision = TP + FP + df["precision"] = np.where(denomPrecision != 0, TP / denomPrecision, 0.0) + # F1 Score + denomF1 = df['precision'] + df['recall'] + df["f1_score"] = np.where(denomF1 != 0, 2.0 * df['precision'] * df['recall'] / denomF1, 0.0) + + # Tversky score = TP / (TP + alpha * FP + beta * FN), gives penalty to overshoot --> alpha + alpha = float(cfgOpt['LOSS_PARAMETERS']['tverskyAlpha']) + beta = float(cfgOpt['LOSS_PARAMETERS']['tverskyBeta']) + + denomTversky = TP + alpha * FP + beta * FN + df["tversky_score"] = np.where(denomTversky != 0, TP / denomTversky, 0.0) + # Subtract 1 to ensure that 0 are good values and 1 bad + tverskyModified = 1 - df["tversky_score"] + + # Runout + # RMSE divided with Runout length of Reference + sRunoutRef = referenceDF['reference_sRunout'].values + runoutRMSE = df['runoutLineDiff_poly_RMSE'] + runoutRMSENormalised = runoutRMSE / sRunoutRef # 0 is good, 1 is bad + + # Weights + wTversky = float(cfgOpt['LOSS_PARAMETERS']['weightTversky']) + wRunout = float(cfgOpt['LOSS_PARAMETERS']['weightRunout']) + + df['optimisationVariable'] = (wRunout * runoutRMSENormalised.fillna(1) + wTversky * tverskyModified.fillna(1)) + return df + + +def buildFinalDF(avalancheDir, varParList, cfgOpt): + """ + Build the final merged DataFrame for a given avalanche. + + Combines parameter sets, AIMEC results, and areal indicators into one DataFrame, + then computes evaluation metrics 'via addLossMetrics'. + + Parameters + ---------- + avalancheDir : str + Path of avalanche directory + varParList: list of str + List of parameter names that are varied + cfgOpt: configparser.ConfigParser + Config parser of the runOptimisationCfg.ini file + + Returns + ------- + finalDF : pandas.DataFrame + Final DataFrame containing: + - ``simName`` + - ``parameterSet`` + - ``order`` + - Areal indicator columns + - Evaluation metrics (recall, precision, f1_score, tversky_score, optimisationVariable) + """ + + avaName = avalancheDir.split('/')[-1] + + # Folder where ini files from simulations are + inDir = pathlib.Path(avalancheDir, 'Outputs/com8MoTPSA/configurationFiles') + # Read parameterSetDF + paramSetDF = readParamSetDF(inDir, varParList) + + # Dataframe from AIMEC + df_aimec = pd.read_csv( + avalancheDir + '/Outputs/ana3AIMEC/com8MoTPSA/Results_' + avaName + '_ppr_lim_1_w_600resAnalysisDF.csv') + # Get data of the reference in AIMEC + referenceDF = pd.read_csv(f"{avalancheDir}/Outputs/ana3AIMEC/com8MoTPSA/referenceDF.csv") + + # Read areal indicators + arealIndicatorDir = pathlib.Path(avalancheDir, 'Outputs', 'out1Peak', 'arealIndicators.pkl') + indicatorsDF = readArealIndicators(arealIndicatorDir) + + # Merge df's + df_merged = pd.merge(paramSetDF, df_aimec, on='simName', how='inner') + df_merged = df_merged.merge(indicatorsDF, on="simName", how="left") + + # Add optimisation variables + finalDF = addLossMetrics(df_merged, referenceDF, cfgOpt) + return finalDF + + +def createDFParameterLoss(df, paramSelected): + """ + Create DataFrames linking selected parameters with the loss function. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame that contains the selected parameters and their values as well as the loss function. + paramSelected : list of str + Subset of parameters to include in the output DataFrames. + + Returns + ------- + paramLossDF : pandas.DataFrame + DataFrame with one column per selected parameter and an additional + ``Loss`` column with the raw values of ``optimisationVariable``. + paramLossDFScaled : pandas.DataFrame + Same as ``paramLossDF`` but with the selected parameters normalised + to the range [0, 1] using min–max scaling. + """ + paramLossDF = df[paramSelected].copy() + paramLossDFScaled = (paramLossDF - paramLossDF.min()) / (paramLossDF.max() - paramLossDF.min()) # normalise + paramLossDF['Loss'] = df[ + 'optimisationVariable'] + paramLossDFScaled['Loss'] = df[ + 'optimisationVariable'] + return paramLossDF, paramLossDFScaled + + +def fitSurrogate(df, cfgOpt): + """ + Prepare data and initialize surrogate models for loss prediction. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame containing input parameters as columns and a + column named 'Loss' as target variable. + cfgOpt: configparser.ConfigParser + Config parser of the runOptimisationCfg.ini file. + + Returns + ------- + X : numpy.ndarray + Feature matrix of shape (n_samples, n_features). + y : numpy.ndarray + Target vector of shape (n_samples,). + gp_pipe : sklearn.pipeline.Pipeline + Pipeline consisting of feature standardization and a + Gaussian Process regressor with Matern kernel. + """ + # Prepare X, y + y_col = 'Loss' + X = df.drop(columns=[y_col]).to_numpy(dtype=float) + y = df[y_col].to_numpy(dtype=float).reshape(-1) + n_features = X.shape[1] + + # GP kernel with Matern-Covariance + matern_nu = float(cfgOpt['OPTIMISATION']['matern_nu']) + kernel = ( + ConstantKernel(1.0, (1e-3, 1e3)) # Output variance tells how strong Y varies + * Matern(length_scale=np.ones(n_features), + length_scale_bounds=(1e-2, 1e2), # in z (variance) space + nu=matern_nu) + + WhiteKernel(noise_level=1e-4, noise_level_bounds=(1e-8, 1e-1)) + ) + gp = GaussianProcessRegressor( + kernel=kernel, + alpha=1e-8, + normalize_y=True, + n_restarts_optimizer=10, + random_state=0, + ) + + # Pipelines (feature scaling + model) + gp_pipe = Pipeline([("x_scaler", StandardScaler()), ("model", gp)]) + return X, y, gp_pipe + + +def KFoldCV(X, y, pipe, cfgOpt, outDir, avaName, pipeName): + """ + Perform k-fold cross-validation for a regression pipeline. + + Parameters + ---------- + X : numpy.ndarray + Feature matrix of shape (n_samples, n_features). + y : numpy.ndarray + Target vector of shape (n_samples,). + pipe : sklearn.pipeline.Pipeline + Regression pipeline to be evaluated. + cfgOpt: configparser.ConfigParser + Config parser of the runOptimisationCfg.ini file. + outDir : pathlib.Path + File path where the generated image will be saved. + avaName : str + Name of the avalanche. Used for naming the output figure. + pipeName : str + Name of the pipeline, used for formatted console output. + + Returns + ------- + scores : dict + Dictionary containing cross-validation results as returned + by sklearn.model_selection.cross_validate. + """ + # Get optType + optType = cfgOpt['OPTIMISATION']['optType'] + # For losses, sklearn uses "neg_*" because higher-is-better internally + rmse_scorer = "neg_root_mean_squared_error" + mae_scorer = "neg_mean_absolute_error" + r2_scorer = "r2" + k = int(cfgOpt['OPTIMISATION']['k']) + cv = KFold(n_splits=k, shuffle=True, random_state=0) + + scores = cross_validate( + pipe, X, y, cv=cv, + scoring={"rmse": rmse_scorer, "mae": mae_scorer, "r2": r2_scorer}, + return_train_score=True, + error_score="raise" # fail fast if something else is wrong + ) + + # NOTE: rmse/mae were returned as NEGATIVE numbers because the higher is better internally + neg_cols = ["test_rmse", "test_mae", "train_rmse", "train_mae"] + for c in neg_cols: + scores[c] = -scores[c] + # get smoothness parameter nu + matern_nu = float(cfgOpt['OPTIMISATION']['matern_nu']) + row = { + "experiment_name": pipeName, + "n_samples": X.shape[0], + "kernel": f"Matern {matern_nu}", + "noise_model": "WhiteKernel", + } + + for split in ("test", "train"): + for m in ("rmse", "mae", "r2"): + arr = scores[f"{split}_{m}"] + row[f"{split} {m} mean"] = arr.mean() + row[f"{split} {m} std"] = arr.std() + + print(f"\n{pipeName}, {k}-fold CV:") + for split in ("test", "train"): + print(f" {split.capitalize()} metrics:") + for m in ("rmse", "mae", "r2"): + arr = scores[f"{split}_{m}"] + print(f" {m.upper():<4}: {arr.mean():.4g} ± {arr.std():.4g}") + + df = pd.DataFrame([row]) + # Include date, format: YYYYMMDD + date = datetime.now().strftime("%Y%m%d") + base = os.path.join(outDir, f"{avaName}_{k}FoldCV_{optType}") + df.to_csv(base + f'_{date}.csv', mode="a", header=not os.path.exists(base + f'_{date}.csv'), + index=False) # checks if header exists + + # Save results as image + saveResults.saveKFoldCVPrintImage(scores, pipeName, k, base + f'_Matern_{matern_nu}_Kernel_{date}.png') + return scores + + +def optimiseNonSeq(pipe, cfgOpt, paramBounds): + """ + Perform non-sequential surrogate-based optimization using + Latin Hypercube sampling. + + Parameters + ---------- + pipe : sklearn.pipeline.Pipeline + Trained surrogate model pipeline used to predict loss + and uncertainty. + cfgOpt : configparser.ConfigParser + Config parser of the runOptimisationCfg.ini file. + paramBounds : dict + Dictionary mapping parameter names to (min, max) bounds. + + Returns + ------- + topNStat : pandas.DataFrame + DataFrame containing statistics of the top N surrogate + candidates with the lowest predicted loss. + """ + paramSelected = list(paramBounds.keys()) + bounds = np.array(list(paramBounds.values()), dtype=float) # shape (d,2) + d = bounds.shape[0] + + # Create LH samples + seed = int(cfgOpt['OPTIMISATION']['seed']) + n_lhs = int(cfgOpt['OPTIMISATION']['n_lhs']) + sampler = qmc.LatinHypercube(d=d, seed=seed) + sample = sampler.random(n=n_lhs) + X0 = qmc.scale(sample, bounds[:, 0], bounds[:, 1]) + + # Prediction of the loss with GP-Model + mu, sigma = pipe.predict(X0, return_std=True) + # Convert X0 to pandas df for analyze function + df_candidates = pd.DataFrame(X0, columns=paramSelected) + n_top_samples = int(cfgOpt['OPTIMISATION']['n_surrogate_top']) + topNStat, _ = analyzeTopCandidates(df_candidates, mu, sigma, paramSelected, N=n_top_samples) + return topNStat + + +def analyzeTopCandidates(df_candidates, mu, sigma, param_cols, N=5): + """ + Analyze the top N surrogate candidates. + + Parameters + ---------- + df_candidates : pandas.DataFrame + Candidate parameter sets. + mu : numpy.ndarray + Predicted loss values. + sigma : numpy.ndarray + Predicted uncertainty values. + param_cols : list of str + Parameter column names. + N : int, optional + Number of best candidates to analyze. + + Returns + ------- + stats : dict + Summary statistics for the top N and best candidate. + topNData : pandas.DataFrame + Top N candidates with predicted mu and sigma. + """ + + # Top N points + idx_topN = np.argsort(mu)[:N] + topNData = df_candidates.iloc[idx_topN].copy() + topNData["mu"] = mu[idx_topN] + topNData["sigma"] = sigma[idx_topN] + + mean_params = topNData[param_cols].mean() + std_params = topNData[param_cols].std() + mean_mu = topNData["mu"].mean() + std_mu = topNData["mu"].std() + mean_sigma = topNData["sigma"].mean() + std_sigma = topNData["sigma"].std() + + print(f"\n🔍 Mittelwerte ± Std (Top {N}):") + for p in param_cols: + m, s = mean_params[p], std_params[p] + perc = (s / m * 100) if m != 0 else np.nan + print(f" {p:30s}: {m:.6f} ± {s:.6f} ({perc:.1f}%)") + perc_mu = (std_mu / mean_mu * 100) if mean_mu != 0 else np.nan + perc_sigma = (std_sigma / mean_sigma * 100) if mean_sigma != 0 else np.nan + print(f"📉 mu: {mean_mu:.4f} ± {std_mu:.4f} ({perc_mu:.1f}%)") + print(f"📊 sigma: {mean_sigma:.4f} ± {std_sigma:.4f} ({perc_sigma:.1f}%)") + + # Best single point + idx_best = np.argmin(mu) + best_params = df_candidates.iloc[idx_best].copy() + best_loss = mu[idx_best] + best_sigma = sigma[idx_best] + + print("\n🔍 Best single Parameter combination:") + for p in param_cols: + print(f" {p:30s}: {best_params[p]:.4f}") + print(f"📉 mu: {best_loss:.4f}") + print(f"📊 sigma: {best_sigma:.4f}") + + return { + f"TopNBest": { + "mean_params": mean_params, + "std_params": std_params, + "mean_mu": mean_mu, + "std_mu": std_mu, + "mean_sigma": mean_sigma, + "std_sigma": std_sigma, + }, + "Best": { + "params": best_params, + "mu": best_loss, + "sigma": best_sigma, + } + }, topNData + + +def expectedImprovement(mu, sigma, f_best, xi=0): + """ + Compute the Expected Improvement (EI) acquisition function + for minimization problems. + Formula taken from: + https://ekamperi.github.io/machine%20learning/2021/06/11/acquisition-functions.html + + Parameters + ---------- + mu : numpy.ndarray + Predicted mean values of the surrogate model. + sigma : numpy.ndarray + Predicted standard deviations of the surrogate model. + f_best : float + Best observed objective value so far. + xi : float, optional + Exploration parameter controlling exploitation–exploration + trade-off. + + Returns + ------- + ei : numpy.ndarray + Expected Improvement values for each candidate. + """ + sigma = np.maximum(sigma, 1e-12) # numeric safety + imp = f_best - mu - xi # minimization, that's why the sign is different, xi for finetunig exploitation + + Z = imp / sigma + ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z) + ei[sigma <= 1e-12] = 0.0 # set EI to zero where sigma is 1e-12 + return ei + + +def lowerConfidenceBound(mu, sigma, k=0.0): + """ + Compute the Lower Confidence Bound (LCB) acquisition function. + Formula taken from: + https://ekamperi.github.io/machine%20learning/2021/06/11/acquisition-functions.html + + Parameters + ---------- + mu : numpy.ndarray + Predicted mean values of the surrogate model. + sigma : numpy.ndarray + Predicted standard deviations of the surrogate model. + k : float, optional + Exploration parameter controlling the trade-off between + exploitation and exploration. + + Returns + ------- + lcb : numpy.ndarray + Lower Confidence Bound values for each candidate. + """ + return -mu + k * sigma + + +def EINextPoint(pipe, y, paramBounds, cfgOpt): + """ + Propose the next evaluation point using Expected Improvement (EI) or lower confidence bound (LCB). + + This function generates a Latin Hypercube sample (LHS) of candidate points within the + parameter bounds, evaluates a surrogate model (``pipe``) on those candidates, and + selects the candidate that maximises EI or LCB for a *minimisation* + problem. + + Parameters + ---------- + pipe : sklearn.pipeline.Pipeline + Trained surrogate model pipeline used to predict loss + and uncertainty. + y : array-like + Observed objective values (loss) from previous evaluations. + paramBounds : dict + Dictionary mapping parameter names to (lower, upper) bounds. + cfgOpt : configparser.ConfigParser + Config parser of the runOptimisationCfg.ini file. + + Returns + ------- + xBest : numpy.ndarray + Vector of the values of the selected parameters that maximises EI among the LHS candidates. + xBestDict : dict + Mapping of parameter name to selected value for ``xBest``. + best_ei : float + Maximum EI value among the candidate set. + best_lcb : float + Maximum LCB value among the candidate set (computed for reference/diagnostics). + """ + paramSelected = list(paramBounds.keys()) + bounds = np.array(list(paramBounds.values()), dtype=float) # shape (d,2) + d = bounds.shape[0] + f_best = np.nanmin(y) + + # Create LH samples + seed = int(cfgOpt['OPTIMISATION']['seed']) + n_lhs = int(cfgOpt['OPTIMISATION']['n_lhs']) + sampler = qmc.LatinHypercube(d=d, seed=seed) + sample = sampler.random(n=n_lhs) + X0 = qmc.scale(sample, bounds[:, 0], bounds[:, 1]) + + # Predict with pipe + mu, sigma = pipe.predict(X0, return_std=True) + + print(mu.mean(), mu.max(), sigma.mean(), sigma.max()) + + # EI or LCB for minimization + ei = expectedImprovement(mu, sigma, f_best) + lcb = lowerConfidenceBound(mu, sigma) + + xBest = X0[np.argmax(ei)].copy() + # xBest = X0[np.argmax(lcb)].copy() + xBestDict = {feat: float(val) for feat, val in zip(paramSelected, xBest)} + + return xBest, xBestDict, np.max(ei), np.max(lcb) + + +def runCom8MoTPSA(avalancheDir, xBestDict, cfgMain, i=0, optimisationType=None): + """ + Based on the default runCom8MoTPSA function in com8MoTPSA/runCom8MoTPSA.py file, but overrides parameter values in + the module configuration using the values provided in ``xBestDict``. It also assigns a unique visualisation scenario + identifier and records the sampling method for traceability. + + Parameters + ---------- + avalancheDir: str + Path to avalanche directory. + xBestDict : dict + Mapping of parameter name to selected value for ``xBest``. + cfgMain : configparser.ConfigParser + General configuration for avaframe. + i : int, optional + Counter for identifying the number of iterations. + optimisationType: str, optional + Name of the optimisation type, sequential or non-sequential. + + Returns + --------- + simName: str + Name of the simulation. + """ + # Time the whole routine + startTime = time.time() + + # log file name; leave empty to use default runLog.log + logName = 'runCom8MoTPSA' + + # Start logging + log = logUtils.initiateLogger(avalancheDir, logName) + log.info('MAIN SCRIPT') + log.info('Current avalanche: %s', avalancheDir) + # ---------------- + # Clean input directory(ies) of old work and output files + # If you just created the ``avalancheDir`` this one should be clean but if you + # already did some calculations you might want to clean it:: + initializeProject.cleanSingleAvaDir(avalancheDir, deleteOutput=False) + # Get module config + cfgCom8MoTPSA = cfgUtils.getModuleConfig(com8MoTPSA, toPrint=False) + + # overwrite com8MoTPSACfg with xBest values + for param, val in xBestDict.items(): + # print(param, val) + section = probAna.fetchParameterSection(cfgCom8MoTPSA, param) + cfgCom8MoTPSA[section][param] = str(val) + # give visualisation unique scenario for identifying later + cfgCom8MoTPSA['VISUALISATION']['scenario'] = str(i) + if optimisationType == 'nonSeq': + cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = 'nonSeq' + else: + cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = 'EI/LCB' + + # ---------------- + # Run psa + simName = com8MoTPSA.com8MoTPSAMain(cfgMain, cfgInfo=cfgCom8MoTPSA, returnSimName=True) + # Print time needed + endTime = time.time() + log.info('Took %6.1f seconds to calculate.' % (endTime - startTime)) + + return simName + + +def findSimName(finalDF, paramValue, atol=1e-6): + """ + Return the simName in finalDF whose parameter columns match + the given paramValue within a numerical tolerance. + + Parameters + ---------- + finalDF : pandas.DataFrame + Must contain column simName and parameter columns given in paramValue. + paramValue : dict or iterable of (name, value) + Parameter values to match. + atol : float, optional + Absolute tolerance for float comparison (default: 1e-6). + + Returns + ------- + str + The first matching simName. + """ + mask = np.ones(len(finalDF), dtype=bool) + for col, val in dict(paramValue).items(): + s = pd.to_numeric(finalDF[col], errors="coerce") + mask &= np.isclose(s, float(val), atol=atol, rtol=0) + + matches = finalDF.loc[mask, "simName"] + return matches.iloc[0] + + +def loadVariationData(cfgOpt, outDir, avaDir): + """ + Load parameter bounds and selected parameters for optimisation. Two execution modes are supported, controlled via + cfgOpt['PARAM_BOUNDS']['scenario']: + + Scenario 1 (Morris pre-run): + - A Morris sensitivity analysis has already been executed. + - Ranked parameters and their bounds are loaded from + 'sa_parameters_bounds.pkl'. + - The top-N most influential parameters are selected for optimisation. + + Scenario 2 (Manual definition): + - No prior Morris screening. + - Parameter names and corresponding bounds are loaded from a previously saved pickle file generated by + ``runAna4ProbAnaCom8MoTPSA.py``. The parameter variation is therefore not defined within this function, but is + determined by the configuration specified in ``probAnaCfg.ini``. The ``probAnaCfg.ini`` file contains the + settings used to generate the initial sample set, including parameter ranges and variation rules. + + + Parameters + ---------- + cfgOpt: configparser.ConfigParser + Configuration object containing the section 'PARAM_BOUNDS'. + outDir: pathlib.Path + Directory containing the Morris output file + ('sa_parameters_bounds.pkl'). + avaDir: str + Directory of the avalanche. + + Returns + ------- + paramBounds : dict + Dictionary mapping parameter names to (min, max) tuples. + paramSelected : list + List of selected parameter names used for optimisation. + """ + # Read scenario flag + scenario = int(cfgOpt['PARAM_BOUNDS']['scenario']) + + avaName = avaDir.split('/')[-1] + + # Scenario 1: Morris run prior + if scenario == 1: + # Load SA data and define how much parameters should be optimized (variation bounds included) + SiDFSort = pd.read_pickle(outDir / f"{avaName}_sortedSAResultsWithBounds.pkl") + topN = int(cfgOpt['PARAM_BOUNDS']['topN']) + paramSelected = list(SiDFSort['Parameter'][:topN]) + paramBounds = dict(zip(paramSelected, SiDFSort['bounds'])) + + # Scenario 2: Morris is not run prior + else: + # Load variation data with bounds from pickle file + inDir2 = pathlib.Path(avaDir, 'Outputs', "ana4Stats") + paramValuesD = pd.read_pickle(inDir2 / "paramValuesD.pickle") + paramSelected = paramValuesD['names'] + paramBounds = { + name: (float(bounds[0]), float(bounds[1])) + for name, bounds in zip(paramValuesD["names"], paramValuesD["bounds"]) + } + return paramBounds, paramSelected + + +def loadMorrisConvergenceData(cfgMorrisSA, avalancheDir, avaName): + """ + Load Morris sensitivity analysis results for convergence plotting. + + Parameters + ---------- + cfgMorrisSA : configparser.ConfigParser + Morris configuration object containing section + 'MORRIS_CONVERGENCE' and 'GENERAL'. + avalancheDir : str or pathlib.Path + Root avalanche directory. + avaName : str + Avalanche name used in filename construction. + + Returns + ------- + SA_dfs : list[pandas.DataFrame] + Loaded Morris result DataFrames in the order defined in the ini file. + r_vals : list[int] + Corresponding number of trajectories (r) extracted from folder names. + outputs : list[str] + Output folder names as defined in the ini file. + outDir: pathlib.Path + Path where results are saved to. + reference_r : int + The number of trajectories used as refernce for ordering the parameters. + """ + + section = cfgMorrisSA["MORRIS_CONVERGENCE"] + + outputs = [] + r_vals = [] + + # Read outputs in ini order + for key, value in section.items(): + if key.lower().startswith("outputs"): + outputs.append(value) + + match = re.search(r"R(\d+)", value) + if match: + r_vals.append(int(match.group(1))) + else: + raise ValueError( + f"Could not extract r-value from folder name '{value}'" + ) + + # Build module path + resultDir = cfgMorrisSA["GENERAL"]["resultDir"] # e.g. Outputs/ana6MorrisSA + moduleDir = pathlib.Path(resultDir).name # e.g. ana6MorrisSA + comModuleName = cfgMorrisSA["GENERAL"]["modName"] + + filename = f"{avaName}_sortedSAResultsWithBounds.pkl" + + # Load DataFrames + selectedOutput = cfgMorrisSA['MORRIS_CONVERGENCE']['referenceOutput'] + + # Get reference trajectory number, import for determining of the order + match = re.search(r"R(\d+)", selectedOutput) + if match: + reference_r = int(match.group(1)) + else: + raise ValueError( + f"Could not extract r-value from referenceOutput '{selectedOutput}'" + ) + + selectedDir = None + SA_dfs = [] + + for out_name in outputs: + outDir = pathlib.Path(avalancheDir, out_name, moduleDir, comModuleName) + if out_name == selectedOutput: + selectedDir = outDir + with open(outDir / filename, "rb") as fi: + SA_dfs.append(pickle.load(fi)) + + return SA_dfs, r_vals, outputs, selectedDir, reference_r diff --git a/avaframe/ana6Optimisation/runMorrisSA.py b/avaframe/ana6Optimisation/runMorrisSA.py new file mode 100644 index 000000000..13b030fc8 --- /dev/null +++ b/avaframe/ana6Optimisation/runMorrisSA.py @@ -0,0 +1,97 @@ +""" +Run script for the Morris sensitivity analysis. For usage read README_ana6.md. +""" +import sys +import numpy as np +import pandas as pd +import pathlib +from datetime import datetime +from SALib.analyze import morris as morris_analyze + +from avaframe.in3Utils import cfgUtils +from avaframe.in3Utils import fileHandlerUtils as fU +from avaframe.ana3AIMEC import ana3AIMEC +import avaframe.out3Plot.outAna6Plots as saveResults +import optimisationUtils + +# Get module config +module = sys.modules[__name__] +cfgMorrisSA = cfgUtils.getModuleConfig(module, toPrint=False) + +# Load avalanche directory from general configuration file +cfgMain = cfgUtils.getGeneralConfig() +avalancheDir = cfgMain['MAIN']['avalancheDir'] +avaName = pathlib.Path(avalancheDir).name + +# Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak +optimisationUtils.calcArealIndicatorsAndAimec(cfgMorrisSA, avalancheDir, ana3AIMEC) + +# Load variation data with bounds from pickle file +inDir = pathlib.Path(avalancheDir, 'Outputs', "ana4Stats") +paramValuesD = pd.read_pickle(inDir / "paramValuesD.pickle") +varParList = paramValuesD['names'] +# +# Read and merge results from parameter sets (simulation data), areal indicators and AIMEC +finalDF = optimisationUtils.buildFinalDF(avalancheDir, varParList, cfgMorrisSA) + +# Use only morris samples +morrisDF = finalDF[finalDF['sampleMethods'] == 'morris'].copy(deep=True) +# Set order as index +morrisDF.set_index('order', inplace=True) +# Order df based on order (which is the index) +morrisDF.sort_index(inplace=True) + +# Define input for SA +problem = { + 'num_vars': len(varParList), + 'names': varParList, + 'bounds': paramValuesD['bounds'] +} +samples = np.vstack(morrisDF['parameterSet'].values).astype(float) +Y = morrisDF['optimisationVariable'].values + +# Perform SA +Si = morris_analyze.analyze( + problem, + samples, + Y, + conf_level=float(cfgMorrisSA['MORRIS SA']['conf_level']), + num_levels=int(cfgMorrisSA['MORRIS SA']['num_levels']) +) + +# Rank Parameters +SiData = { + "Parameter": Si['names'], + "mu_star": Si['mu_star'], + "sigma": Si['sigma'], + "mu_star_conf": Si['mu_star_conf']} + +# Convert to dataframe +SiDF = pd.DataFrame(SiData) + +# Create folder for saving the results of the analysis, if not already existing +resultDir = cfgMorrisSA['GENERAL']['resultDir'] +comModuleName = cfgMorrisSA['GENERAL']['modName'] +outDir = pathlib.Path(avalancheDir, resultDir, comModuleName) +fU.makeADir(outDir) + +saveResults.barplotSA(SiDF, avaName, outDir) +saveResults.scatterplotSA(SiDF, avaName, outDir) +saveResults.scatterplotUncertaintySA(SiDF, avaName, outDir) + +# Sort SA results +SiDFSort = SiDF.sort_values("mu_star", ascending=False).reset_index(drop=True) +# Append bounds to SiDFSort +paramBounds = dict(zip(problem["names"], problem["bounds"])) +SiDFSort["bounds"] = SiDFSort["Parameter"].map(paramBounds) +# Save as Pickle for Optimization +SiDFSort.to_pickle(outDir / f"{avaName}_sortedSAResultsWithBounds.pkl") + +# Create df with parameters and the loss function for summary statistics +paramLossDF, paramLossScaledDF = optimisationUtils.createDFParameterLoss(morrisDF, SiDFSort['Parameter']) +N = int(cfgMorrisSA['MORRIS SA']['N']) +paramLossSubsetDF = paramLossDF.sort_values(by='Loss', ascending=True)[:N] +# Save mean values of best input parameters as csv +date = datetime.now().strftime("%Y%m%d") +csvPath = f"{outDir}/{avaName}_MorrisBEST{N}Simulations_{date}.csv" +paramLossSubsetDF.describe().to_csv(csvPath) diff --git a/avaframe/ana6Optimisation/runMorrisSACfg.ini b/avaframe/ana6Optimisation/runMorrisSACfg.ini new file mode 100644 index 000000000..eb94c9b68 --- /dev/null +++ b/avaframe/ana6Optimisation/runMorrisSACfg.ini @@ -0,0 +1,46 @@ +### Config File - This file contains the main settings the morris SA + +# Sidenote: (1) when running runMorrisSA.py the working directory needs to be in the ana6Optimisation folder +# (2) avaDirectory in avaframeCfg.ini need ../ as prefix + +# SA ... sensitivity analysis + + +[GENERAL] +# USER input for running and plotting a comparison of simulation result to reference polygon +resType = ppr +thresholdValueSimulation = 1 +modName = com8MoTPSA +# in this folder the results will be saved, avalancheDir + this folder +resultDir = Outputs/ana6MorrisSA + +[LOSS_PARAMETERS] +# alpha gives penalty if sim. overshoot the ref. (FP) +# beta gives penalty if sim. comes short compared to the ref. (FN) +tverskyAlpha = 2 +tverskyBeta = 1 +# Loss function is kombination of TverskyScore * weightTversky + RunoutNormalised * weight runout +weightTversky = 0.25 +weightRunout = 0.75 + +[MORRIS SA] +# confidence level and number of levels used for SA +conf_level = 0.95 +num_levels = 6 +# number of best morris samples for statistics +N = 10 + + +[MORRIS_CONVERGENCE] +# for morris convergence plot, morris needs to be run with different morris trajectories prior +# define Output folder where results of morris analyses with different trajectories (R) are saved, naming: OutputsRNumber +Outputs1 = OutputsR5 +Outputs2 = OutputsR10 +Outputs3 = OutputsR20 +Outputs4 = OutputsR40 + +# get order of parameters from this output, also save result of convergence into child folders of this parent folder +referenceOutput = OutputsR10 + + + diff --git a/avaframe/ana6Optimisation/runOptimisation.py b/avaframe/ana6Optimisation/runOptimisation.py new file mode 100644 index 000000000..b0ec5266d --- /dev/null +++ b/avaframe/ana6Optimisation/runOptimisation.py @@ -0,0 +1,152 @@ +""" +Run script for the optimization process. For usage read README_ana6.md. +""" +import sys +import pathlib +import pickle + +from avaframe.in3Utils import cfgUtils +from avaframe.in3Utils import fileHandlerUtils as fU +from avaframe.ana3AIMEC import ana3AIMEC +import avaframe.out3Plot.outAna6Plots as saveResults +import optimisationUtils + +# Get module config +module = sys.modules[__name__] +cfgOpt = cfgUtils.getModuleConfig(module, toPrint=False) + +# Load avalanche directory from general configuration file +cfgMain = cfgUtils.getGeneralConfig() +avalancheDir = cfgMain['MAIN']['avalancheDir'] +avaName = pathlib.Path(avalancheDir).name + +# Create folder for saving the results of the analysis, if not already existing +resultDirOpt = cfgOpt['GENERAL']['resultDir'] +comModuleName = cfgOpt['GENERAL']['modName'] +outDir = pathlib.Path(avalancheDir, resultDirOpt, comModuleName) +fU.makeADir(outDir) + +# Get config from morris for path to morris results +cfgDir = 'runMorrisSA.ini' +cfgMorrisSA = cfgUtils.getModuleConfig(pathlib.Path(cfgDir), toPrint=False) +resultDirMorris = cfgMorrisSA['GENERAL']['resultDir'] +inDir = pathlib.Path(avalancheDir, resultDirMorris, comModuleName) + +# Load variation parameters and their bounds +paramBounds, paramSelected = optimisationUtils.loadVariationData(cfgOpt, inDir, avalancheDir) + +# Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak +optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) + +# Read and merge results from parameter sets (simulation data), areal indicators and AIMEC +finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) + +# ---------------------------------------------------------------------------------------------------------------------- +optimisationType = cfgOpt['OPTIMISATION']['optType'] +if optimisationType == 'nonseq': + csv_path = outDir / f"{avaName}_BestOrCurrentSimulation_NonSeq.csv" + # Save sim with currently best y + saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, csv_path=csv_path) + + # Create df with most important parameters and the loss function + emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) + + # Create surrogate + X, y, gp_pipe = optimisationUtils.fitSurrogate(emulatorDF, cfgOpt) # X,y are features of emulatorDF + + # K fold cross validation + optimisationUtils.KFoldCV(X, y, gp_pipe, cfgOpt, outDir, avaName, "Gaussian Process Matern Kernel") + + # Fit final pipline + gp_pipe.fit(X, y) + + # Optimize non-sequential (only use pipe once to find best param) + topNStat = optimisationUtils.optimiseNonSeq(gp_pipe, cfgOpt, paramBounds) + + # Run com8 with mean parameters of best N surrogate evaluations + simNameMean = optimisationUtils.runCom8MoTPSA(avalancheDir, topNStat['TopNBest']['mean_params'], cfgMain, + optimisationType='nonSeq') + # Run com8 with parameters of best surrogate evaluations + simNameBest = optimisationUtils.runCom8MoTPSA(avalancheDir, topNStat['Best']['params'], cfgMain, + optimisationType='nonSeq') + # SimName could be None if sim is already available, if so get the name from finalDF + if simNameMean is None: + simNameMean = optimisationUtils.findSimName(finalDF, topNStat["TopNBest"]["mean_params"], atol=1e-6) + if simNameBest is None: + simNameBest = optimisationUtils.findSimName(finalDF, topNStat["Best"]["params"], atol=1e-6) + + # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak + optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) + + # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC + finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) + + # Create image of table + saveResults.saveTopCandidates(finalDF, paramSelected, cfgOpt, topNStat, + out_path=outDir / f"{avaName}_StatisticsBestParameterValues_NonSeqAnalysis.png", + title=f"{avaName}, NonSeq-Analysis: Best Surrogate vs Best Model Run", + simNameMean=simNameMean, simNameBest=simNameBest) + + # Save latest real sim + saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, simName=simNameMean, csv_path=csv_path) + +# ---------------------------------------------------------------------------------------------------------------------- +elif optimisationType == 'seq': + csv_path = outDir / f"{avaName}_BestOrCurrentSimulation_BO.csv" + # Save sim with currently best y + saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, csv_path=csv_path) + + eiThreshold = float(cfgOpt['OPTIMISATION']['eiThreshold']) + nGoodSims = float(cfgOpt['OPTIMISATION']['numberOfGoodSimulations']) + countGoodSims = 0 + bo_max_iterations = int(cfgOpt['OPTIMISATION']['bo_max_iterations']) + + for i in range(bo_max_iterations): + # Create df with most important parameters and the loss function + emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) + # Train surrogate + X, y, gp_pipe = optimisationUtils.fitSurrogate(emulatorDF, cfgOpt) # X,y are features of emulatorDF + + # K fold cross validation + optimisationUtils.KFoldCV(X, y, gp_pipe, cfgOpt, outDir, avaName, "Gaussian Process Matern Kernel") + + # Fit final pipline + gp_pipe.fit(X, y) + + # Get next input parameters with EI + xBest, xBestDict, ei, lcb = optimisationUtils.EINextPoint(gp_pipe, y, paramBounds, cfgOpt) + + # Run com8 with best x + simName = optimisationUtils.runCom8MoTPSA(avalancheDir, xBestDict, cfgMain, i, optimisationType='seq') + + # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak + optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) + + # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC + finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) + + # Save latest sim + saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, ei, lcb, simName, + csv_path=csv_path) + # If ei is smaller than threshold, the simulation is counted as 'good', if number of good simulations is + # reached, optimization stops + if ei < eiThreshold: + countGoodSims = countGoodSims + 1 + if countGoodSims >= nGoodSims: + break + + saveResults.saveTopCandidates(finalDF, paramSelected, cfgOpt, + out_path=outDir / f"{avaName}_StatisticsBestParameterValues_BOAnalysis.png", + title=f"{avaName}, Seq-Analysis: Best Model Runs") + + # Save BO plots + n_top_samples = int(cfgOpt['OPTIMISATION']['n_model_top']) + emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) + + saveResults.BOConvergencePlot(finalDF, avaName, outDir) + saveResults.BOBoxplot(emulatorDF, avaName, outDir, N=n_top_samples) + saveResults.BOBoxplotNormalised(emulatorDF, paramBounds, avaName, outDir, N=n_top_samples) + +# Save pickle file of finalDF +with open(outDir / f"{avaName}_finalDF.pickle", "wb") as fi: + pickle.dump(finalDF, fi) diff --git a/avaframe/ana6Optimisation/runOptimisationCfg.ini b/avaframe/ana6Optimisation/runOptimisationCfg.ini new file mode 100644 index 000000000..be8a3dbd4 --- /dev/null +++ b/avaframe/ana6Optimisation/runOptimisationCfg.ini @@ -0,0 +1,60 @@ +### Config File - This file contains the main settings for the optimisation process + +# Sidenote: (1) when running runOptimisation.py the working directory needs to be in the ana6Optimisation folder +# (2) avaDirectory in avaframeCfg.ini need ../ as prefix + +# BO ... bayesian optimisation +# ei ... expected improvement + + +[GENERAL] +# USER input for running and plotting a comparison of simulation result to reference polygon +resType = ppr +thresholdValueSimulation = 1 +modName = com8MoTPSA +# in this folder the results will be saved, avalancheDir + this folder +resultDir = Outputs/ana6Optimisation + + +[PARAM_BOUNDS] +# 2 scenarios: choose 1 or 2 +scenario = 2 +#(1): morris is run prior, then dataframe of ranked input parameters is already saved by runMorris.py as pickle file, user only need to determine how much input paramters to use for optimisation +topN = 6 +# (2): morris is not run prior, input parameters and their bounds are read from a previously saved pickle file generated by ``runAna4ProbAnaCom8MoTPSA.py``. The parameter variation is determined by the configuration specified in ``probAnaCfg.ini``. + + +[LOSS_PARAMETERS] +# alpha gives penalty if sim. overshoot the ref. (FP) +# beta gives penalty if sim. comes short compared to the ref. (FN) +tverskyAlpha = 2 +tverskyBeta = 1 + +weightTversky = 0.25 +weightRunout = 0.75 + + +[OPTIMISATION] +# Type of optimisation: seq or nonseq +optType = seq +# define the smoothness of the matern kernel of the surrogate, e.g.: 0.5, 1.5, 2.5 +matern_nu = 2.5 +# define k for k-fold cross validation +k = 5 +# sampling seed for generation of Latin Hypercube samples used for loss-prediction with surrogate +seed = 12345 +# number of LH samples +n_lhs = 1000000 +# determine the number N of surrogate's best simulations used for statistics +n_surrogate_top = 100 +# determine the number N of models best simulations used for statistics (n < available model runs) +n_model_top = 6 + +# settings for seq: +# if ei is smaller than threshold, the simulation is counted as 'good', if number of good simulations is reached, optimisation stops +eiThreshold = 0.01 +numberOfGoodSimulations = 5 +bo_max_iterations = 2 + + + diff --git a/avaframe/ana6Optimisation/runPlotMorrisConvergence.py b/avaframe/ana6Optimisation/runPlotMorrisConvergence.py new file mode 100644 index 000000000..eca869a52 --- /dev/null +++ b/avaframe/ana6Optimisation/runPlotMorrisConvergence.py @@ -0,0 +1,30 @@ +""" +Run script for the Morris convergence plot. For usage read README_ana6.md. +""" +import pathlib + +import avaframe.out3Plot.outAna6Plots as saveResults +from avaframe.in3Utils import cfgUtils +import optimisationUtils + +# Load avalanche directory from general configuration file +cfgMain = cfgUtils.getGeneralConfig() +avalancheDir = cfgMain['MAIN']['avalancheDir'] +avaName = pathlib.Path(avalancheDir).name + +# Load morris config file +cfgDir = 'runMorrisSA.ini' +cfgMorrisSA = cfgUtils.getModuleConfig(pathlib.Path(cfgDir), toPrint=False) + +# Load Morris sensitivity analysis results for convergence plot +SA_dfs, r_vals, outputs, outDir, reference_r = optimisationUtils.loadMorrisConvergenceData(cfgMorrisSA, avalancheDir, + avaName) + +# Top 7 parameters +saveResults.plotMorrisConvergence(SA_dfs, r_vals, reference_r, k=7, + outpath=outDir / f'{avaName}_MorrisSAConvergencePlotTop7.png', + title=f"{avaName} Convergence plot for top 7 parameters") +# All parameters +saveResults.plotMorrisConvergence(SA_dfs, r_vals, reference_r, k=None, + outpath=outDir / f'{avaName}_MorrisSAConvergencePlotAll.png', + title=f"{avaName} Convergence plot for all parameters") diff --git a/avaframe/com1DFA/com1DFATools.py b/avaframe/com1DFA/com1DFATools.py index 5ba11a87a..aba363f48 100644 --- a/avaframe/com1DFA/com1DFATools.py +++ b/avaframe/com1DFA/com1DFATools.py @@ -346,6 +346,7 @@ def initializeInputs(avalancheDir, cleanRemeshedRasters, module=com1DFA): simDFExisting, simNameExisting = cfgUtils.readConfigurationInfoFromDone( avalancheDir, specDir="", + modName=modName ) # fetch input data - dem, release-, entrainment- and resistance areas (and secondary release areas) diff --git a/avaframe/com8MoTPSA/com8MoTPSA.py b/avaframe/com8MoTPSA/com8MoTPSA.py index 033e7d4ec..679636ca5 100644 --- a/avaframe/com8MoTPSA/com8MoTPSA.py +++ b/avaframe/com8MoTPSA/com8MoTPSA.py @@ -18,12 +18,13 @@ import avaframe.in3Utils.fileHandlerUtils as fU from avaframe.out1Peak import outPlotAllPeak as oP import avaframe.in3Utils.MoTUtils as mT +from avaframe.in3Utils.initializeProject import _checkForFolderAndDelete # create local logger log = logging.getLogger(__name__) -def com8MoTPSAMain(cfgMain, cfgInfo=None): +def com8MoTPSAMain(cfgMain, cfgInfo=None, returnSimName=None): """Run the full MoT-PSA workflow: generate configs, run simulations in parallel, postprocess. Parameters @@ -32,6 +33,8 @@ def com8MoTPSAMain(cfgMain, cfgInfo=None): main AvaFrame configuration (avalancheDir, nCPU, plot flags) cfgInfo : dict or None, optional override configuration info passed to MoTGenerateConfigs + returnSimName : any, optional + if not None, return the first simDict key after running """ # Get all necessary information from the configuration files currentModule = sys.modules[__name__] @@ -53,22 +56,56 @@ def com8MoTPSAMain(cfgMain, cfgInfo=None): log.info("--- STARTING (potential) PARALLEL PART ----") - # Get number of CPU Cores wanted - nCPU = cfgUtils.getNumberOfProcesses(cfgMain, len(rcfFiles)) - - # Create parallel pool and run - # with multiprocessing.Pool(processes=nCPU) as pool: - with Pool(processes=nCPU) as pool: - results = pool.map(com8MoTPSATask, rcfFiles) - pool.close() - pool.join() - - timeNeeded = "%.2f" % (time.time() - startTime) - log.info("Overall (parallel) com8MoTPSA computation took: %s s " % timeNeeded) - log.info("--- ENDING (potential) PARALLEL PART ----") + # Split into chunks to postprocess and clean up work dirs incrementally + chunkSize = 8 + if len(rcfFiles) > chunkSize: + for i in range(0, len(rcfFiles), chunkSize): + rcfFilesChunk = rcfFiles[i:i + chunkSize] + simNamesChunk = [p.stem for p in rcfFilesChunk] + + nCPU = cfgUtils.getNumberOfProcesses(cfgMain, len(rcfFilesChunk)) + + if bool(simNamesChunk): + with Pool(processes=nCPU) as pool: + results = pool.map(com8MoTPSATask, rcfFilesChunk) + pool.close() + pool.join() + + timeNeeded = "%.2f" % (time.time() - startTime) + log.info("Overall (parallel) com8MoTPSA computation took: %s s " % timeNeeded) + log.info("--- ENDING (potential) PARALLEL PART ----") + + # Postprocess the simulations + com8MoTPSAPostprocess(simNamesChunk, cfgMain, inputSimFiles) + + # Delete folder in Work directory after postprocessing to reduce memory costs + avaDir = cfgMain["MAIN"]["avalancheDir"] + for sim in simNamesChunk: + folderName = "Work/com8MoTPSA/" + sim + _checkForFolderAndDelete(avaDir, folderName) + else: + log.warning("There is no simulation to be performed for releaseScenario") + else: + nCPU = cfgUtils.getNumberOfProcesses(cfgMain, len(rcfFiles)) + + simNames = [p.stem for p in rcfFiles] + if bool(simNames): + with Pool(processes=nCPU) as pool: + results = pool.map(com8MoTPSATask, rcfFiles) + pool.close() + pool.join() + + timeNeeded = "%.2f" % (time.time() - startTime) + log.info("Overall (parallel) com8MoTPSA computation took: %s s " % timeNeeded) + log.info("--- ENDING (potential) PARALLEL PART ----") + + # Postprocess the simulations + com8MoTPSAPostprocess(simNames, cfgMain, inputSimFiles) + else: + log.warning("There is no simulation to be performed for releaseScenario") - # Postprocess the simulations - com8MoTPSAPostprocess(simDict, cfgMain, inputSimFiles) + if returnSimName is not None and simDict: + return next(iter(simDict)) def copyRawToLayerPeakFiles(workDir, outputDirPeakFile): @@ -106,7 +143,7 @@ def copyRawToLayerPeakFiles(workDir, outputDirPeakFile): shutil.copy2(source, target) -def com8MoTPSAPostprocess(simDict, cfgMain, inputSimFiles): +def com8MoTPSAPostprocess(simNames, cfgMain, inputSimFiles): """Postprocess MoT-PSA results: rename outputs to L1/L2 peak files, generate plots and reports. For each simulation, copies DataTime.txt and renames raw MoT-PSA output files @@ -114,8 +151,8 @@ def com8MoTPSAPostprocess(simDict, cfgMain, inputSimFiles): Parameters ---------- - simDict : dict - simulation dictionary + simNames : list + list of simulation name strings cfgMain : configparser.ConfigParser main AvaFrame configuration (avalancheDir, plot flags) inputSimFiles : dict @@ -128,7 +165,7 @@ def com8MoTPSAPostprocess(simDict, cfgMain, inputSimFiles): outputDirPeakFile = pathlib.Path(avalancheDir) / "Outputs" / "com8MoTPSA" / "peakFiles" fU.makeADir(outputDirPeakFile) - for key in simDict: + for key in simNames: workDir = pathlib.Path(avalancheDir) / "Work" / "com8MoTPSA" / str(key) # Copy DataTime.txt @@ -137,6 +174,13 @@ def com8MoTPSAPostprocess(simDict, cfgMain, inputSimFiles): copyRawToLayerPeakFiles(workDir, outputDirPeakFile) + # Write config indicator files to track completed simulations + configFileName = "%s.ini" % key + for saveDir in ["configurationFilesDone", "configurationFilesLatest"]: + configDir = pathlib.Path(avalancheDir, "Outputs", "com8MoTPSA", "configurationFiles", saveDir) + with open((configDir / configFileName), "w") as fi: + fi.write("see directory configurationFiles for info on config") + # create plots and report modName = __name__.split(".")[-1] reportDir = pathlib.Path(avalancheDir, "Outputs", modName, "reports") diff --git a/avaframe/com8MoTPSA/com8MoTPSACfg.ini b/avaframe/com8MoTPSA/com8MoTPSACfg.ini index 6c5a76d08..cf0a65075 100644 --- a/avaframe/com8MoTPSA/com8MoTPSACfg.ini +++ b/avaframe/com8MoTPSA/com8MoTPSACfg.ini @@ -222,3 +222,7 @@ Initial CFL number (-) = 0.8 # peak files and plots are exported, option to turn off exports when exportData is set to False # this affects export of peak files and also generation of peak file plots exportData = True + +[VISUALISATION] +# scenario name - can be used for plotting +scenario = diff --git a/avaframe/in3Utils/cfgUtils.py b/avaframe/in3Utils/cfgUtils.py index dafd133ba..0f4ab5aac 100644 --- a/avaframe/in3Utils/cfgUtils.py +++ b/avaframe/in3Utils/cfgUtils.py @@ -958,7 +958,7 @@ def setStrnanToNan(simDF, simDFTest, name): return simDF -def readConfigurationInfoFromDone(avaDir, specDir="", latest=False): +def readConfigurationInfoFromDone(avaDir, specDir="", latest=False, modName=''): """Check avaName/Outputs/com1DFA/configurationFilesDone and pass names of all files found in this directory and create corresponding simDF this is useful if e.g. no allConfigurations.csv has @@ -976,6 +976,8 @@ def readConfigurationInfoFromDone(avaDir, specDir="", latest=False): path to a directory where simulation configuration files directory called configurationFiles can be found - optional latest: bool if True check for files found in avaName/Outputs/com1DFA/configurationFilesLatest + modName: str + name of the module to be used for task (optional) Returns -------- @@ -989,7 +991,10 @@ def readConfigurationInfoFromDone(avaDir, specDir="", latest=False): if specDir != "": inDir = pathlib.Path(specDir, "configurationFiles") else: - inDir = pathlib.Path(avaDir, "Outputs", "com1DFA", "configurationFiles") + if modName == 'com8MoTPSA': + inDir = pathlib.Path(avaDir, "Outputs", "com8MoTPSA", "configurationFiles") + else: + inDir = pathlib.Path(avaDir, "Outputs", "com1DFA", "configurationFiles") # search inDir/configurationFilesDone or inDir/configurationFilesLatest (depending on latest flag) for already existing sims if latest: @@ -1007,13 +1012,23 @@ def readConfigurationInfoFromDone(avaDir, specDir="", latest=False): simDF = None else: # create simDF (dataFrame with one row per simulation of configuration files found in configDir) - simDF = createConfigurationInfo( - avaDir, - comModule="com1DFA", - standardCfg="", - writeCSV=False, - specDir=specDir, - simNameList=simNameExisting, + if modName == 'com8MoTPSA': + simDF = createConfigurationInfo( + avaDir, + comModule="com8MoTPSA", + standardCfg="", + writeCSV=False, + specDir=specDir, + simNameList=simNameExisting, + ) + else: + simDF = createConfigurationInfo( + avaDir, + comModule="com1DFA", + standardCfg="", + writeCSV=False, + specDir=specDir, + simNameList=simNameExisting, ) # check for allConfigurationsInfo to find computation info and add to info fetched from ini files diff --git a/avaframe/out3Plot/outAna6Plots.py b/avaframe/out3Plot/outAna6Plots.py new file mode 100644 index 000000000..82e105d96 --- /dev/null +++ b/avaframe/out3Plot/outAna6Plots.py @@ -0,0 +1,891 @@ +import numpy as np +import pandas as pd +import pathlib +import matplotlib.pyplot as plt +from adjustText import adjust_text +from datetime import datetime + + +def barplotSA(SiDF, avaName, outDir): + """ + Create a bar plot of Morris sensitivity results. + + Bars show μ* as percentage of the total sensitivity, with bar width + proportional to σ. A vertical dashed line is drawn where the cumulative μ* + first reaches 80%. + + Parameters + ---------- + SiDF : pandas.DataFrame + DataFrame with at least the following columns: + - ``Parameter`` : str, parameter names + - ``mu_star`` : float, mean absolute elementary effect + - ``mu_star_conf`` : float, optional, confidence interval of μ* + - ``sigma`` : float, standard deviation of elementary effects + avaName : str + Avalanche name, used in the saved filename. + outDir : str or pathlib.Path + Directory where the plot is saved. + """ + + # 1) Sort by mu_star (descending) + df = SiDF.sort_values("mu_star", ascending=False).reset_index(drop=True) + + # 2) Normalize μ* so the percentages sum to 100 + total_mu = df["mu_star"].sum() + mu_pct = 100 * df["mu_star"] / total_mu + + # scale the confidence interval to the same percentage units + if "mu_star_conf" in df: + mu_pct_conf = 100 * df["mu_star_conf"] / total_mu + else: + mu_pct_conf = None + + # 3) Map σ to bar widths + wmin, wmax = 0.3, 0.9 + rng = df["sigma"].max() - df["sigma"].min() + sigma_norm = (df["sigma"] - df["sigma"].min()) / (rng if rng else 1) + bar_widths = wmin + (wmax - wmin) * sigma_norm + + # 4) Plot + x = np.arange(len(df)) + fig, ax = plt.subplots(figsize=(12, 6)) + bars = ax.bar( + x, mu_pct, width=bar_widths, edgecolor="black", + capsize=3 if mu_pct_conf is not None else 0 + ) + + ax.set_xticks(x, df["Parameter"], rotation=60, ha="right") + ax.set_ylabel("μ* (% of total)") + ax.set_title("Sensitivity: μ* (percent of total) with σ mapped to bar width") + + # Show value labels + for xi, yi in zip(x, mu_pct): + ax.text(xi, yi, f"{yi:.1f}%", ha="center", va="bottom", fontsize=9) + + # 7) Save figure + # Include date, format: YYYYMMDD + date = datetime.now().strftime("%Y%m%d") + figName = f"{outDir}/{avaName}_paramRanking_{date}.png" + plt.savefig(figName, dpi=300, bbox_inches="tight") + + +def scatterplotSA(SiDF, avaName, outDir): + """ + Create a scatter plot of Morris sensitivity results (μ* vs σ). + + Parameters + ---------- + SiDF : pandas.DataFrame + DataFrame with at least the following columns: + - ``Parameter`` : str, parameter names + - ``mu_star`` : float, mean absolute elementary effect + - ``sigma`` : float, standard deviation of elementary effects + avaName : str + Avalanche name, used in the saved filename. + outDir : str or pathlib.Path + Directory where the plot is saved. + """ + + # Scatter Plot + plt.figure(figsize=(12, 7)) + plt.scatter(SiDF['mu_star'], SiDF['sigma'], color='blue') + + # Annotate name to the points + for i, txt in enumerate(SiDF['Parameter']): + plt.annotate(txt, (SiDF['mu_star'][i], SiDF['sigma'][i]), fontsize=9, xytext=(5, 5), textcoords='offset points') + + # Label and title + plt.xlabel('mu_star (Einflussstärke)', fontsize=12) + plt.ylabel('sigma (Nichtlinearität / Interaktionen)', fontsize=12) + plt.title('Morris Sensitivitätsanalyse: mu_star vs sigma', fontsize=14) + plt.grid(True) + + # Save figure + # Include date, format: YYYYMMDD + date = datetime.now().strftime("%Y%m%d") + figName = f"{outDir}/{avaName}_Scatterplot_{date}.png" + plt.savefig(figName, dpi=300, bbox_inches="tight") + + +def scatterplotUncertaintySA(SiDF, avaName, outDir): + """ + Create a scatter plot of Morris sensitivity results with uncertainty. + Plots μ* vs σ with horizontal error bars given by ``mu_star_conf``. + + Parameters + ---------- + SiDF : pandas.DataFrame + DataFrame with at least the following columns: + - ``Parameter`` : str, parameter names + - ``mu_star`` : float, mean absolute elementary effect + - ``mu_star_conf`` : float, confidence interval of μ* + - ``sigma`` : float, standard deviation of elementary effects + avaName : str + Avalanche name, used in the saved filename. + outDir : str or pathlib.Path + Directory where the plot is saved. + """ + # Plot with error bars + plt.figure(figsize=(12, 7)) + plt.errorbar( + SiDF['mu_star'], SiDF['sigma'], + xerr=SiDF['mu_star_conf'], + fmt='o', color='blue', ecolor='gray', elinewidth=1.5, capsize=4 + ) + + # Annotations with adjustText + texts = [plt.text(SiDF['mu_star'][i], SiDF['sigma'][i], SiDF['Parameter'][i], fontsize=9) for i in range(len(SiDF))] + adjust_text(texts, arrowprops=dict(arrowstyle='->', color='gray', lw=0.5)) + + # Axes and layout + plt.xlabel('mu_star (Einflussstärke)', fontsize=12) + plt.ylabel('sigma (Nichtlinearität / Interaktionen)', fontsize=12) + plt.title('Morris Sensitivitätsanalyse: mu_star vs sigma (mit Unsicherheit)', fontsize=14) + + # Save figure + # Include date, format: YYYYMMDD + date = datetime.now().strftime("%Y%m%d") + figName = f"{outDir}/{avaName}_ScatterplotUncertainty_{date}.png" + plt.savefig(figName, dpi=300, bbox_inches="tight") + + +def BOConvergencePlot(finalDF, avaName, outDir): + """ + This function visualises the evolution of the optimisation variable + (loss) across different sampling phases: + + 1. Latin hypercube sampling + 2. Bayesian optimisation (EI/LCB) + 3. Optional non-sequential surrogate-based sampling + + Parameters + ---------- + finalDF : pandas.DataFrame + DataFrame containing simulation results. + + avaName : str + Name of the avalanche. Used for naming the output figure. + + outDir : pathlib.Path + Directory where the convergence plot will be saved. + + """ + # Color palette + c_best = '#4e79a7' + c_bo = '#59a14f' + c_lhs = '#76b7b2' + c_nonseq = '#e15759' + + df = finalDF.dropna( + subset=["sampleMethods", "order", "optimisationVariable"] + ).copy() + + # Split by sampling method + latin = df[df["sampleMethods"] == "latin"].sort_values("order") + bo = df[df["sampleMethods"] == "EI/LCB"].sort_values("order") + nonseq = df[df["sampleMethods"] == "nonSeq"].sort_values("order") + + # Iteration axis + current_offset = 0 + + if not latin.empty: + latin["iter"] = latin["order"] + current_offset = latin["iter"].max() + 1 + + if not bo.empty: + bo["iter"] = bo["order"] + current_offset + current_offset = bo["iter"].max() + 1 + + if not nonseq.empty: + nonseq = nonseq.sort_index().copy() + nonseq["iter"] = np.arange(current_offset, current_offset + len(nonseq)) + current_offset = nonseq["iter"].max() + 1 + + # Combine for best-so-far + all_parts = [latin, bo] + if not nonseq.empty: + all_parts.append(nonseq) + + all_df = pd.concat(all_parts).sort_values("iter") + all_df["best_so_far"] = all_df["optimisationVariable"].cummin() + + # Plot + fig, ax = plt.subplots(figsize=(9, 5)) + + if not latin.empty: + ax.scatter( + latin["iter"], + latin["optimisationVariable"], + s=35, + alpha=0.7, + color=c_lhs, + label="Latin hypercube" + ) + + if not bo.empty: + ax.scatter( + bo["iter"], + bo["optimisationVariable"], + s=40, + alpha=0.85, + color=c_bo, + label="Bayesian optimisation (EI/LCB)" + ) + + if not nonseq.empty: + ax.scatter( + nonseq["iter"], + nonseq["optimisationVariable"], + s=40, + alpha=0.85, + color=c_nonseq, + label="Non-sequential sampling" + ) + + ax.plot( + all_df["iter"], + all_df["best_so_far"], + linewidth=1.5, + color=c_best, + label="Best-so-far" + ) + + # Add separator lines between sampling phases (if applicable) + if not latin.empty: + ax.axvline(latin["iter"].max() + 0.5, linestyle="--", linewidth=1, color="black") + if not bo.empty: + ax.axvline(bo["iter"].max() + 0.5, linestyle="--", linewidth=1, color="black") + + ax.set_xlabel("Iteration") + ax.set_ylabel("Optimisation variable (loss)") + ax.set_title("Convergence: Latin hypercube → Bayesian optimisation") + ax.legend(frameon=False, loc='best') + + fig.tight_layout() + + # Save figure + date = datetime.now().strftime("%Y%m%d") + figName = f"{outDir}/{avaName}_BOConvergence_{date}.png" + fig.savefig(figName, dpi=300, bbox_inches="tight") + plt.close(fig) + + +def BOBoxplot(paramLossDF, avaName, outDir, N=10): + """ + Create boxplots of the top-N parameter sets based on loss. + + This function selects the N best-performing simulations (lowest loss) + and visualises the distribution of their parameter values using boxplots. + Each parameter is plotted in a separate subplot. + + Parameters + ---------- + paramLossDF : pandas.DataFrame + DataFrame containing model parameters and corresponding loss values. + + avaName : str + Name of the avalanche. Used for naming the output figure. + + outDir : pathlib.Path + Directory where the boxplot figure will be saved. + + N : int, optional + Number of best-performing simulations (lowest loss) to include. + Default is 10. + """ + df_best = paramLossDF.nsmallest(N, "Loss") + param_cols = paramLossDF.columns.drop('Loss') + + fig, axes = plt.subplots(2, 4, figsize=(10, 6)) + axes = axes.flatten() + for ax, col in zip(axes, param_cols): + ax.boxplot(df_best[col]) + ax.set_xticklabels([col]) + + # If less than 8 parameters, remove empty axes + for ax in axes[len(param_cols):]: + ax.axis("off") + plt.tight_layout() + + # Save figure + # Include date, format: YYYYMMDD + date = datetime.now().strftime("%Y%m%d") + figName = f"{outDir}/{avaName}_BOBoxplot_{date}.png" + plt.savefig(figName, dpi=300, bbox_inches="tight") + + +def BOBoxplotNormalised(paramLossDF, paramBounds, avaName, outDir, N=10): + """ + Create normalized boxplots of parameters for the top-N simulations. + + This function selects the N best-performing simulations (lowest loss) + and visualises the distribution of their parameter values after + min–max normalization based on predefined parameter bounds. + + Parameters + ---------- + paramLossDF : pandas.DataFrame + DataFrame containing model parameters and corresponding loss values. + + paramBounds : dict + Dictionary mapping parameter names to their (min, max) bounds: + {parameter_name: (lower_bound, upper_bound)} + + Bounds are used for min–max normalization. + + avaName : str + Name of the avalanche. Used for naming the output figure. + + outDir : pathlib.Path + Directory where the normalized boxplot figure will be saved. + + N : int, optional + Number of best-performing simulations (lowest loss) to include. + Default is 10. + """ + df_best = paramLossDF.nsmallest(N, "Loss") + param_cols = paramLossDF.columns.drop('Loss') + + # normalize using parameter bounds + data = [ + (df_best[c] - paramBounds[c][0]) / (paramBounds[c][1] - paramBounds[c][0]) + for c in param_cols + ] + + fig, ax = plt.subplots(figsize=(15, 7)) + ax.boxplot(data) + ax.set_xticks(range(1, len(param_cols) + 1)) + ax.set_xticklabels(param_cols, rotation=45, ha="right", fontsize=14) + ax.set_ylabel("Normalized value (min–max)", fontsize=14) + ax.set_title(f"Normalized parameter distributions (best {N})", fontsize=16) + ax.tick_params(axis="y", labelsize=12) + fig.tight_layout() + + # Save figure + # Include date, format: YYYYMMDD + date = datetime.now().strftime("%Y%m%d") + figName = f"{outDir}/{avaName}_BOBoxplotNormalised_{date}.png" + plt.savefig(figName, dpi=300, bbox_inches="tight") + + +def saveKFoldCVPrintImage(scores, pipeName, k, out_path): + """ + Save a summary of K-fold cross-validation results as an image. The output image contains metrics for both + training and test sets. + + Reported metrics per split: + - RMSE (Root Mean Squared Error) + - MAE (Mean Absolute Error) + - R² (Coefficient of determination) + + Parameters + ---------- + scores : dict + Dictionary containing cross-validation results. Expected keys: + - "train_rmse", "train_mae", "train_r2" + - "test_rmse", "test_mae", "test_r2" + + pipeName : str + Name of the model or pipeline. Included in the figure header. + + k : int + Number of folds used in cross-validation. + + out_path : str or pathlib.Path + File path where the generated image will be saved. + """ + lines = [f"{pipeName}, {k}-fold CV:\n"] + for split in ("test", "train"): + lines.append(f"{split.capitalize()} metrics:") + for m in ("rmse", "mae", "r2"): + arr = scores[f"{split}_{m}"] + lines.append(f" {m.upper():<4}: {arr.mean():.4g} ± {arr.std():.4g}") + lines.append("") + + text = "\n".join(lines) + + fig = plt.figure(figsize=(6, 4)) + plt.axis("off") + plt.text(0.01, 0.99, text, va="top", family="monospace") + + plt.savefig(out_path, dpi=300, bbox_inches="tight") + plt.close(fig) + + +def saveBestorCurrentModelrun(finalDF, paramSelected, ei=None, lcb=None, simName=None, csv_path='dummy.csv'): + """ + Save either the best model run (based on optimisationVariable) or a + specified simulation (simName) to a CSV file. + + Parameters + ---------- + finalDF : pandas.DataFrame + containing all simulation results and optimization metrics. + paramSelected : list of str + List of parameter names that should be includedvin the exported output. + ei : float, optional + Expected Improvement value (used in Bayesian optimization). + If None, the column will be created with None. + lcb : float, optional + Lower Confidence Bound value (used in Bayesian optimization). + If None, the column will be created with None. + simName : str, optional + If provided, the row corresponding to this simulation name is saved. + If None, the row with the minimal optimisationVariable is saved. + csv_path : str or pathlib.Path, optional + Path to the CSV file. + + Notes + ----- + Only a subset of relevant columns (including selected parameters) + is written to the output file. + """ + # Subset df, save only important entries + cols_keep = ['simName', 'sampleMethods', 'order', 'Simulation time (s)', 'Minimum time step (s)', + 'Initial CFL number (-)', 'TP_SimRef_cells', 'TP_SimRef_area', 'FP_SimRef_cells', 'FP_SimRef_area', + 'FN_SimRef_cells', 'FN_SimRef_area', 'recall', 'precision', 'f1_score', 'tversky_score', + 'optimisationVariable'] + columns_keep = cols_keep[:6] + [p for p in paramSelected if p not in cols_keep] + cols_keep[6:] + df = finalDF[columns_keep] + + if simName is not None: + row = df.loc[df['simName'] == simName].copy() + else: + idx = df['optimisationVariable'].idxmin() + row = df.loc[[idx]].copy() + + # Ensure the optional columns always exist + if ei is not None: + row["ei"] = ei + else: + row['ei'] = None + if lcb is not None: + row["lcb"] = lcb + else: + row['lcb'] = None + # Write to csv + path = pathlib.Path(csv_path) + row.to_csv(path, mode="a", index=False, header=not path.exists()) + + +def saveTopCandidates(finalDF, paramSelected, cfgOpt, results_dict=None, out_path="analysisTable.png", title=None, + simNameMean=None, simNameBest=None): + """ + Create result table(s) for surrogate/model top candidates and save as PNG and CSV. + + Behavior + -------- + - If `results_dict` is provided: creates up to two tables + 1) Surrogate summary (TopNBest mean/std + surrogate single best in "best" column) + Optionally appends a row "optimisationVariable" where: + - "mean" is filled from `simNameMean` (lookup in finalDF) + - "best" is filled from `simNameBest` (lookup in finalDF) + 2) Model summary (Top N + single best), where N is read from cfgOpt['OPTIMISATION']['n_model_top'] + - If `results_dict` is None: creates only the model table (2). + + Outputs + ------- + - PNG at `out_path` + - CSV next to the PNG with suffix `_tables.csv` (tidy format with a `table` column) + + Parameters + ---------- + finalDF : pandas.DataFrame + Must contain column "optimisationVariable". + If simNameMean/simNameBest is used, must contain column "simName". + Must contain parameter columns listed in `paramSelected`. + paramSelected : list[str] + Parameter column names to summarize for the model table. + cfgOpt : configparser.ConfigParser-like + Needs: + - cfgOpt['OPTIMISATION']['n_surrogate_top'] + - cfgOpt['OPTIMISATION']['n_model_top'] + results_dict : dict | None, optional + If provided, expected keys: + - "TopNBest": dict with "mean_params", "std_params", "mean_mu", "std_mu", "mean_sigma", "std_sigma" + - "Best": dict with "params", "mu", "sigma" + out_path : str | pathlib.Path, optional + Path to save PNG figure. + title : str | None, optional + Optional global title for the PNG figure. + simNameMean : str | None, optional + Simulation name whose model optimisationVariable is placed into the surrogate table row + "optimisationVariable" under the "mean" column. + simNameBest : str | None, optional + Simulation name whose model optimisationVariable is placed into the surrogate table row + "optimisationVariable" under the "best" column. + + Returns + ------- + pathlib.Path + Path to the saved PNG figure. + """ + tables = [] + + # ========================================================== + # Surrogate table (optional) + # ========================================================== + if results_dict is not None: + top = results_dict["TopNBest"] + + # --- normalize mean/std params to Series --- + mean_params = top["mean_params"] + std_params = top["std_params"] + + # If params are list/tuple of (name,value) pairs, convert to dict first + if not isinstance(mean_params, dict): + mean_params = dict(mean_params) + if not isinstance(std_params, dict): + std_params = dict(std_params) + + df_top = pd.DataFrame( + { + "mean": pd.Series(mean_params, dtype="float64"), + "std": pd.Series(std_params, dtype="float64"), + } + ) + df_top["relStd [%]"] = relstd(df_top["std"], df_top["mean"]) + + extra = pd.DataFrame( + { + "mean": [top["mean_mu"], top["mean_sigma"]], + "std": [top["std_mu"], top["std_sigma"]], + }, + index=["optimisationVariable_Surrogate", "sigma"], + ) + extra["relStd [%]"] = relstd(extra["std"], extra["mean"]) + + df_top = pd.concat([df_top, extra], axis=0) + + # --- create/attach "best" column from surrogate Best --- + best = results_dict["Best"] + best_params = best["params"] + if not isinstance(best_params, dict): + best_params = dict(best_params) + + best_series = pd.Series(best_params, dtype="float64") + best_series.loc["optimisationVariable_Surrogate"] = float(best["mu"]) + best_series.loc["sigma"] = float(best["sigma"]) + df_top["best"] = best_series + + # --- add model optimisationVariable row: mean and/or best --- + def _get_optvar_for_sim(sim_name, label): + if "simName" not in finalDF.columns: + raise ValueError(f"{label} set but finalDF has no 'simName' column.") + sel = finalDF.loc[finalDF["simName"] == sim_name, "optimisationVariable"] + if sel.empty: + raise ValueError(f"{label}='{sim_name}' not found in finalDF['simName'].") + return float(pd.to_numeric(sel.iloc[0], errors="coerce")) + + if simNameMean is not None or simNameBest is not None: + # Ensure row exists + if "optimisationVariable" not in df_top.index: + df_top.loc["optimisationVariable", ["mean", "std", "relStd [%]", "best"]] = [np.nan, np.nan, np.nan, + np.nan] + + if simNameMean is not None: + df_top.loc["optimisationVariable", ["mean", "std", "relStd [%]"]] = [ + _get_optvar_for_sim(simNameMean, "simNameMean"), + np.nan, + np.nan, + ] + if simNameBest is not None: + df_top.loc["optimisationVariable", "best"] = _get_optvar_for_sim(simNameBest, "simNameBest") + + n_surrogate_top = int(cfgOpt["OPTIMISATION"]["n_surrogate_top"]) + mean_tag = f" ({simNameMean})" if simNameMean else "" + best_tag = f" ({simNameBest})" if simNameBest else "" + + t1 = ( + f"Surrogate: Mean of Top {n_surrogate_top} Best{mean_tag} " + f"+ Single Best{best_tag}" + ) + tables.append((fmt_df(df_top), t1)) + + # ========================================================== + # Model table (always) + # ========================================================== + if "optimisationVariable" not in finalDF.columns: + raise ValueError("finalDF must contain column 'optimisationVariable'.") + + best_idx = finalDF["optimisationVariable"].idxmin() + + n_model_top = int(cfgOpt["OPTIMISATION"]["n_model_top"]) + topN = finalDF.nsmallest(n_model_top, "optimisationVariable") + + df_topN = summary_table(topN, paramSelected, best_row=finalDF.loc[best_idx]) + + opt_mean = pd.to_numeric(topN["optimisationVariable"], errors="coerce").mean() + opt_std = pd.to_numeric(topN["optimisationVariable"], errors="coerce").std() + + df_topN.loc["optimisationVariable", ["mean", "std", "relStd [%]", "best"]] = [ + opt_mean, + opt_std, + relstd(opt_std, opt_mean), + pd.to_numeric(finalDF.loc[best_idx, "optimisationVariable"], errors="coerce"), + ] + + best_sim = finalDF.at[best_idx, "simName"] if "simName" in finalDF.columns else "" + t2 = f"Model: Mean of Top {n_model_top} Best + Single Best{f' ({best_sim})' if best_sim else ''}" + tables.append((fmt_df(df_topN), t2)) + + # ========================================================== + # Plot + # ========================================================== + out_path = pathlib.Path(out_path) + out_path.parent.mkdir(parents=True, exist_ok=True) + + fig_h = 1.2 + 0.38 * sum(len(df) for df, _ in tables) + fig, axes = plt.subplots(len(tables), 1, figsize=(10, fig_h)) + axes = [axes] if len(tables) == 1 else axes + + if title: + fig.suptitle(title, fontsize=14, y=0.99) + + for ax, (df_disp, t) in zip(axes, tables): + ax.axis("off") + tbl = ax.table( + cellText=df_disp.values, + rowLabels=df_disp.index.tolist(), + colLabels=df_disp.columns.tolist(), + loc="center", + ) + tbl.auto_set_font_size(False) + tbl.set_fontsize(9) + tbl.scale(1, 1.2) + ax.set_title(t, fontsize=12, pad=10) + + plt.tight_layout(rect=(0, 0, 1, 0.98)) + fig.savefig(out_path, dpi=200, bbox_inches="tight") + plt.close(fig) + + # ========================================================== + # CSV export (tidy) + # ========================================================== + csv_path = out_path.with_name(f"{out_path.stem}_tables.csv") + frames = [] + for df_disp, t in tables: + tmp = df_disp.copy().reset_index(names=["row"]) + tmp.insert(0, "table", t) + frames.append(tmp) + pd.concat(frames, ignore_index=True).to_csv(csv_path, index=False) + + return out_path + + +def formatSig(x): + """ + Format numbers for display in tables (significant digits, sci notation for small values). + - NaN or non-numeric values → "" + - 0 → "0" + - |x| < 1e-3 → scientific notation with 2 decimal places + - |x| < 100 → 2 significant digits + - |x| ≥ 100 → rounded integer + + Parameters + ---------- + x : Any + Value to be formatted. If conversion to float fails or the value + is NaN, an empty string is returned. + + Returns + ------- + str + Formatted string representation suitable for compact table output. + """ + # --- Number formatting --- + try: + x = float(x) + except (TypeError, ValueError): + return "" + if pd.isna(x): + return "" + if x == 0.0: + return "0" + if abs(x) < 1e-3: + return f"{x:.2e}" # small numbers in scientific notation + elif abs(x) < 100: + return f"{x:.2g}" # 2 significant digits + else: + return str(int(round(x))) + + +def fmt_df(df): + """ + Apply formatSig to common summary columns for prettier display/export. + + Parameters + ---------- + df : pandas.DataFrame + Input DataFrame containing summary statistics. + + Returns + ------- + pandas.DataFrame + A copy of the input DataFrame with selected columns formatted + as strings for display purposes. + + """ + g = df.copy() + for c in ["mean", "std", "relStd [%]", "best"]: + if c in g.columns: + g[c] = g[c].map(formatSig) + return g + + +def relstd(std, mean): + """ + Relative std in percent; returns NaN if mean is 0 or NaN. + + Parameters + ---------- + std : array-like or scalar + Standard deviation values. + + mean : array-like or scalar + Mean values corresponding to `std`. + + Returns + ------- + numpy.ndarray + Relative standard deviation in percent. Returns NaN where: + - mean is 0 + - mean is NaN + - conversion to numeric fails + """ + mean = pd.to_numeric(mean, errors="coerce") + std = pd.to_numeric(std, errors="coerce") + return np.where((mean == 0) | pd.isna(mean), np.nan, std / mean * 100.0) + + +def summary_table(df, cols, best_row=None): + """ + Compute summary statistics (mean, standard deviation, relative standard deviation, + and optional best values) for selected numeric columns. + + The function converts the specified columns to numeric (coercing errors to NaN), + computes column-wise statistics, and returns them in a compact summary table. + + Parameters + ---------- + df : pandas.DataFrame + Input DataFrame containing the data to summarise. + + cols : list-like + List of column names for which summary statistics should be computed. + + best_row : pandas.Series or dict-like, optional + Row containing reference or "best" parameter values. If provided, + the values corresponding to `cols` are included in the output under + the column "best". If None, the "best" column is filled with NaN. + + Returns + ------- + pandas.DataFrame + DataFrame indexed by `cols` containing: + - "mean" : Column-wise mean + - "std" : Column-wise standard deviation + - "relStd [%]" : Relative standard deviation in percent + - "best" : Reference/best values (if provided) + """ + X = df[cols].apply(pd.to_numeric, errors="coerce") + out = pd.DataFrame({"mean": X.mean(), "std": X.std()}) + out["relStd [%]"] = relstd(out["std"], out["mean"]) + if best_row is not None: + out["best"] = pd.to_numeric(best_row[cols], errors="coerce").values + else: + out["best"] = np.nan + return out + + +def plotMorrisConvergence(SA_dfs, r_values, reference_r, k=None, outpath=None, title=None): + """ + Plot convergence of Morris mu_star sensitivities. + + Parameters + ---------- + SA_dfs : list of pandas.DataFrame + Morris result DataFrames. + Must contain columns: 'mu_star', 'Parameter' + r_values : list[int] + Number of trajectories corresponding to SA_dfs. + reference_r : int + Trajectory count r that defines the reference dataset (parameter ranking/order). + k : int or None, optional + Number of top parameters to plot. If None, all parameters are plotted. + outpath : str or pathlib.Path, optional + If given, figure is saved to this path. + title : str, optional + Plot title. + Notes + -------- + - This code was written with AI. + + """ + + # -------------------------------------------------- + # Normalize (in-place safe) + ALIGN BY PARAMETER NAME + # -------------------------------------------------- + SA_dfs_aligned = [] + for df in SA_dfs: + d = df.copy() + if "pct" not in d.columns: + d["pct"] = d["mu_star"] / d["mu_star"].sum() * 100 + # Key fix: align all dataframes by the unique parameter name + d = d.set_index("Parameter") + SA_dfs_aligned.append(d) + + if reference_r not in r_values: + raise ValueError(f"reference_r={reference_r} not found in r_values") + + ref_idx = r_values.index(reference_r) + SA_ref = SA_dfs_aligned[ref_idx] + + # -------------------------------------------------- + # Select parameters (REFERENCE DEFINES ORDER) + # -------------------------------------------------- + if k is None: + top_params = SA_ref.index.tolist() + else: + top_params = SA_ref["pct"].nlargest(k).index.tolist() + + labels = top_params # already parameter names + + # -------------------------------------------------- + # Stack results (REINDEX EACH DF TO REFERENCE ORDER) + # -------------------------------------------------- + Y = np.column_stack([ + d.reindex(top_params)["pct"].to_numpy() + for d in SA_dfs_aligned + ]) + + # -------------------------------------------------- + # Plot + # -------------------------------------------------- + fig, ax = plt.subplots(figsize=(12, 8)) + ax.grid(True, ls="--", lw=0.8, color="gray", alpha=0.6, zorder=0) + + for label, y in zip(labels, Y): + ax.plot( + r_values, + y, + marker="o", + lw=1.5, + label=label + ) + + ax.set_xticks(r_values, [f"r = {r}" for r in r_values]) + ax.set_xlabel("Number of Morris trajectories (r)", fontsize=16) + ax.set_ylabel("Mu* sensitivity (%)", fontsize=16) + + if title is None: + title = "Morris sensitivity convergence" + ax.set_title(title, fontsize=18) + + ax.tick_params(axis="both", labelsize=14) + ax.legend(fontsize=11) + fig.tight_layout() + + if outpath is not None: + fig.savefig(outpath, dpi=300, bbox_inches="tight") diff --git a/avaframe/runScripts/runPlotAreaRefDiffs.py b/avaframe/runScripts/runPlotAreaRefDiffs.py index 9afb82ebf..707fcc464 100644 --- a/avaframe/runScripts/runPlotAreaRefDiffs.py +++ b/avaframe/runScripts/runPlotAreaRefDiffs.py @@ -6,6 +6,8 @@ # importing general python modules import pathlib import numpy as np +import pickle +import re # Local imports from avaframe.in3Utils import cfgUtils @@ -19,100 +21,62 @@ import avaframe.com1DFA.DFAtools as DFAtls -################USER Input############# -resType = "ppr" -thresholdValueSimulation = 0.9 -modName = "com1DFA" -############################################################ - -# Load avalanche directory from general configuration file -cfgMain = cfgUtils.getGeneralConfig() -avalancheDir = cfgMain["MAIN"]["avalancheDir"] -outDir = pathlib.Path(avalancheDir, "Outputs", "out1Peak") -fU.makeADir(outDir) - -# Start logging -logName = "plotAreaDiff_%s" % (resType) -# Start logging -log = logUtils.initiateLogger(avalancheDir, logName) -log.info("MAIN SCRIPT") -log.info("Current avalanche: %s", avalancheDir) - - -# initialize DEM from avalancheDir (used to perform simulations) -# TODO: if meshCellSize was changed - use actual simulation DEM -dem = gI.readDEM(avalancheDir) -# get normal vector of the grid mesh -dem = gT.getNormalMesh(dem, num=1) -# get real Area -dem = DFAtls.getAreaMesh(dem, 1) -dem["originalHeader"] = dem["header"] - -# read reference data set -inDir = pathlib.Path(avalancheDir, "Inputs") -referenceFile, availableFile, _ = gI.getAndCheckInputFiles( - inDir, "REFDATA", "POLY", fileExt="shp", fileSuffix="POLY" -) -# convert polygon to raster with value 1 inside polygon and 0 outside the polygon -referenceLine = shpConv.readLine(referenceFile, "reference", dem) -referenceLine = gT.prepareArea(referenceLine, dem, np.sqrt(2), combine=True, checkOverlap=False) - -# if available zoom into area provided by crop shp file in Inputs/CROPSHAPE -cropFile, cropInfo, _ = gI.getAndCheckInputFiles( - inDir, "POLYGONS", "cropFile", fileExt="shp", fileSuffix="_cropshape" -) -if cropInfo: - cropLine = shpConv.readLine(cropFile, "cropFile", dem) - cropLine = gT.prepareArea(cropLine, dem, np.sqrt(2), combine=True, checkOverlap=False) - -if modName in ["com1DFA", "com5SnowSlide", "com6RockAvalanche", "com8MoTPSA", "com9MoTVoellmy"]: - # load dataFrame for all configurations of simulations in avalancheDir - simDF = cfgUtils.createConfigurationInfo(avalancheDir) - # create data frame that lists all available simulations and path to their result type result files - inputsDF, resTypeList = fU.makeSimFromResDF(avalancheDir, "com1DFA") - # merge parameters as columns to dataDF for matching simNames - dataDF = inputsDF.merge(simDF, left_on="simName", right_on="simName") - - ## loop over all simulations and load desired resType - for index, row in dataDF.iterrows(): - simFile = row[resType] - simData = IOf.readRaster(simFile) - - # compute referenceMask and simulationMask and true positive, false positive and false neg. arrays - # here thresholdValueReference is set to 0.9 as when converting the polygon to a raster, - # values inside polygon are set to 1 and outside to 0 - refMask, compMask, indicatorDict = oPD.computeAreaDiff( - referenceLine["rasterData"], - simData["rasterData"], - 0.9, - thresholdValueSimulation, - dem, - cropToArea=cropLine["rasterData"], +def runPlotAreaRefDiffs(resType, thresholdValueSimulation, modName): + # Load avalanche directory from general configuration file + cfgMain = cfgUtils.getGeneralConfig() + avalancheDir = cfgMain["MAIN"]["avalancheDir"] + outDir = pathlib.Path(avalancheDir, "Outputs", "out1Peak") + fU.makeADir(outDir) + + # Start logging + logName = "plotAreaDiff_%s" % (resType) + # Start logging + log = logUtils.initiateLogger(avalancheDir, logName) + log.info("MAIN SCRIPT") + log.info("Current avalanche: %s", avalancheDir) + + # initialize DEM from avalancheDir (used to perform simulations) + # TODO: if meshCellSize was changed - use actual simulation DEM + dem = gI.readDEM(avalancheDir) + # get normal vector of the grid mesh + dem = gT.getNormalMesh(dem, num=1) + # get real Area + dem = DFAtls.getAreaMesh(dem, 1) + dem["originalHeader"] = dem["header"] + + # read reference data set + inDir = pathlib.Path(avalancheDir, "Inputs") + referenceFile, availableFile, _ = gI.getAndCheckInputFiles( + inDir, "REFDATA", "POLY", fileExt="shp", fileSuffix="POLY" ) + # convert polygon to raster with value 1 inside polygon and 0 outside the polygon + referenceLine = shpConv.readLine(referenceFile, "reference", dem) + referenceLine = gT.prepareArea(referenceLine, dem, np.sqrt(2), combine=True, checkOverlap=False) - # plot differences - oPD.plotAreaDiff( - referenceLine["rasterData"], - refMask, - simData["rasterData"], - compMask, - resType, - simData["header"], - thresholdValueSimulation, - outDir, - indicatorDict, - row["simName"], - cropFile=cropFile, + # if available zoom into area provided by crop shp file in Inputs/CROPSHAPE + cropFile, cropInfo, _ = gI.getAndCheckInputFiles( + inDir, "POLYGONS", "cropFile", fileExt="shp", fileSuffix="_cropshape" ) -else: - # load all result files - resultDir = pathlib.Path(avalancheDir, "Outputs", modName, "peakFiles") - peakFilesList = list(resultDir.glob("*_%s.tif" % resType)) + list(resultDir.glob("*_%s.asc" % resType)) - for pF in peakFilesList: - simData = IOf.readRaster(pF) - simName = pF.stem + if cropInfo: + cropLine = shpConv.readLine(cropFile, "cropFile", dem) + cropLine = gT.prepareArea(cropLine, dem, np.sqrt(2), combine=True, checkOverlap=False) + + if modName in ["com1DFA", "com5SnowSlide", "com6RockAvalanche", "com8MoTPSA", "com9MoTVoellmy"]: + # load dataFrame for all configurations of simulations in avalancheDir + simDF = cfgUtils.createConfigurationInfo(avalancheDir) + # create data frame that lists all available simulations and path to their result type result files + inputsDF, resTypeList = fU.makeSimFromResDF(avalancheDir, "com1DFA") + # merge parameters as columns to dataDF for matching simNames + dataDF = inputsDF.merge(simDF, left_on="simName", right_on="simName") + + ## loop over all simulations and load desired resType + for index, row in dataDF.iterrows(): + simFile = row[resType] + simData = IOf.readRaster(simFile) # compute referenceMask and simulationMask and true positive, false positive and false neg. arrays + # here thresholdValueReference is set to 0.9 as when converting the polygon to a raster, + # values inside polygon are set to 1 and outside to 0 refMask, compMask, indicatorDict = oPD.computeAreaDiff( referenceLine["rasterData"], simData["rasterData"], @@ -133,6 +97,80 @@ thresholdValueSimulation, outDir, indicatorDict, - simName, + row["simName"], cropFile=cropFile, ) + else: + # load all result files + resultDir = pathlib.Path(avalancheDir, "Outputs", modName, "peakFiles") + peakFilesList = list(resultDir.glob("*_%s.tif" % resType)) + list(resultDir.glob("*_%s.asc" % resType)) + + allResults = [] + + for pF in peakFilesList: + simData = IOf.readRaster(pF) + simName = pF.stem + + # Only process PSA simulations + if "psa" not in simName.lower(): + continue + + # compute referenceMask and simulationMask and true positive, false positive and false neg. arrays + refMask, compMask, indicatorDict = oPD.computeAreaDiff( + referenceLine["rasterData"], + simData["rasterData"], + 0.9, + thresholdValueSimulation, + dem, + cropToArea=cropLine["rasterData"], + ) + + # plot differences + oPD.plotAreaDiff( + referenceLine["rasterData"], + refMask, + simData["rasterData"], + compMask, + resType, + simData["header"], + thresholdValueSimulation, + outDir, + indicatorDict, + simName, + cropFile=cropFile, + ) + + allResults.append({ + "sim_name": simName, + "res_type": resType, + "threshold": thresholdValueSimulation, + "indicator_dict": indicatorDict, + }) + + # Save summary of TP/FP/FN indicators as pickle + rows = [] + for entry in allResults: + cleanName = re.sub(r"_ppr$", "", entry["sim_name"]) + indicators = entry["indicator_dict"] + row = {"simName": cleanName} + + for key, subdict in indicators.items(): + shortKey = key.replace("truePositive", "TP_SimRef") \ + .replace("falsePositive", "FP_SimRef") \ + .replace("falseNegative", "FN_SimRef") + row["%s_cells" % shortKey] = subdict.get("nCells", None) + row["%s_area" % shortKey] = subdict.get("areaSum", None) + + rows.append(row) + + with open(outDir / "arealIndicators.pkl", "wb") as f: + pickle.dump(rows, f) + + +if __name__ == "__main__": + ################USER Input############# + resType = "ppr" + thresholdValueSimulation = 1 + modName = "com8MoTPSA" + + runPlotAreaRefDiffs(resType, thresholdValueSimulation, modName) From e41ff1cd1e307685a4aa3a1460a6650b650d2c65 Mon Sep 17 00:00:00 2001 From: Felix Oesterle <6945681+fso42@users.noreply.github.com> Date: Thu, 26 Feb 2026 08:22:13 +0100 Subject: [PATCH 02/20] refactor(runScripts): `runPlotAreaRefDiffs`,removing unused conditions and checks --- avaframe/runScripts/runPlotAreaRefDiffs.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/avaframe/runScripts/runPlotAreaRefDiffs.py b/avaframe/runScripts/runPlotAreaRefDiffs.py index 707fcc464..57bbb703f 100644 --- a/avaframe/runScripts/runPlotAreaRefDiffs.py +++ b/avaframe/runScripts/runPlotAreaRefDiffs.py @@ -61,7 +61,7 @@ def runPlotAreaRefDiffs(resType, thresholdValueSimulation, modName): cropLine = shpConv.readLine(cropFile, "cropFile", dem) cropLine = gT.prepareArea(cropLine, dem, np.sqrt(2), combine=True, checkOverlap=False) - if modName in ["com1DFA", "com5SnowSlide", "com6RockAvalanche", "com8MoTPSA", "com9MoTVoellmy"]: + if modName in ["com1DFA", "com5SnowSlide", "com6RockAvalanche"]: # load dataFrame for all configurations of simulations in avalancheDir simDF = cfgUtils.createConfigurationInfo(avalancheDir) # create data frame that lists all available simulations and path to their result type result files @@ -111,10 +111,6 @@ def runPlotAreaRefDiffs(resType, thresholdValueSimulation, modName): simData = IOf.readRaster(pF) simName = pF.stem - # Only process PSA simulations - if "psa" not in simName.lower(): - continue - # compute referenceMask and simulationMask and true positive, false positive and false neg. arrays refMask, compMask, indicatorDict = oPD.computeAreaDiff( referenceLine["rasterData"], From a7f0a5e5e36db69196831493c3e6c7e8ed51bbe8 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Thu, 26 Feb 2026 09:43:21 +0100 Subject: [PATCH 03/20] fix(probAna): restore missing bounds and config writing after merge - Add bounds to paramValuesD in createSamplesWithVariation (StandardParameters) - Add writing of visualisation scenario and sampling method to com8MoTPSACfg.ini --- avaframe/ana4Stats/probAna.py | 63 ++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/avaframe/ana4Stats/probAna.py b/avaframe/ana4Stats/probAna.py index 18afc1dee..2108fb7d3 100644 --- a/avaframe/ana4Stats/probAna.py +++ b/avaframe/ana4Stats/probAna.py @@ -24,7 +24,6 @@ from avaframe.out3Plot import statsPlots as sP from avaframe.in1Data import getInput as gI - # create local logger # change log level in calling module to DEBUG to see log messages log = logging.getLogger(__name__) @@ -203,17 +202,17 @@ def updateCfgRange(cfg, cfgProb, varName, varDict): if valVariation == "": valVariation = "-" parValue = ( - variationType - + "$" - + valSteps - + "$" - + valVariation - + "$" - + cfgDist["GENERAL"]["minMaxInterval"] - + "$" - + cfgDist["GENERAL"]["buildType"] - + "$" - + cfgDist["GENERAL"]["support"] + variationType + + "$" + + valSteps + + "$" + + valVariation + + "$" + + cfgDist["GENERAL"]["minMaxInterval"] + + "$" + + cfgDist["GENERAL"]["buildType"] + + "$" + + cfgDist["GENERAL"]["support"] ) # if variation using percent elif variationType.lower() == "percent": @@ -225,9 +224,9 @@ def updateCfgRange(cfg, cfgProb, varName, varDict): parValue = valVariation + "$" + valSteps if "ci" in valVariation: message = ( - "Variation Type: range - variationValue is %s not a valid option - only \ - scalar value allowed or consider variationType rangefromci" - % valVariation + "Variation Type: range - variationValue is %s not a valid option - only \ + scalar value allowed or consider variationType rangefromci" + % valVariation ) log.error(message) raise AssertionError(message) @@ -236,9 +235,9 @@ def updateCfgRange(cfg, cfgProb, varName, varDict): parValue = valVariation + "$" + valSteps else: message = ( - "Variation Type: %s - not a valid option, options are: percent, range, \ - normaldistribution, rangefromci" - % variationType + "Variation Type: %s - not a valid option, options are: percent, range, \ + normaldistribution, rangefromci" + % variationType ) log.error(message) raise AssertionError(message) @@ -272,9 +271,9 @@ def updateCfgRange(cfg, cfgProb, varName, varDict): valValues = np.linspace(float(valStart), float(valStop), int(valSteps)) else: message = ( - "Variation Type: %s - not a valid option, options are: percent, range, \ - normaldistribution, rangefromci" - % variationType + "Variation Type: %s - not a valid option, options are: percent, range, \ + normaldistribution, rangefromci" + % variationType ) log.error(message) raise AssertionError(message) @@ -616,8 +615,8 @@ def makeDictFromVars(cfg): if (len(varParList) == len(varValues) == len(cfg[lengthsPar].split("|")) == len(varTypes)) is False: message = ( - "For every parameter in varParList a variationValue, %s and variationType needs to be provided" - % lengthsPar + "For every parameter in varParList a variationValue, %s and variationType needs to be provided" + % lengthsPar ) log.error(message) raise AssertionError(message) @@ -801,13 +800,14 @@ def createSampleWithVariationStandardParameters(cfgProb, cfgStart, varParList, v "values": sampleWBounds, "typeList": cfgProb["PROBRUN"]["varParType"].split("|"), "thFromIni": "", + "bounds": np.column_stack((lowerBounds, upperBounds)).tolist() } return paramValuesD def createSampleWithVariationForThParameters( - avaDir, cfgProb, cfgStart, varParList, valVariationValue, varType, thReadFromShp + avaDir, cfgProb, cfgStart, varParList, valVariationValue, varType, thReadFromShp ): """Create a sample of parameters for a desired parameter variation, and fetch thickness values from shp file and perform variation for each feature within @@ -907,23 +907,23 @@ def createSampleWithVariationForThParameters( # set lower and upper bounds depending on varType (percent, range, rangefromci) lowerBounds[fullVarType == "percent"] = varValList[fullVarType == "percent"] - varValList[ fullVarType == "percent" - ] * (fullValVar[fullVarType == "percent"] / 100.0) + ] * (fullValVar[fullVarType == "percent"] / 100.0) upperBounds[fullVarType == "percent"] = varValList[fullVarType == "percent"] + varValList[ fullVarType == "percent" - ] * (fullValVar[fullVarType == "percent"] / 100.0) + ] * (fullValVar[fullVarType == "percent"] / 100.0) lowerBounds[fullVarType == "range"] = ( - varValList[fullVarType == "range"] - fullValVar[fullVarType == "range"] + varValList[fullVarType == "range"] - fullValVar[fullVarType == "range"] ) upperBounds[fullVarType == "range"] = ( - varValList[fullVarType == "range"] + fullValVar[fullVarType == "range"] + varValList[fullVarType == "range"] + fullValVar[fullVarType == "range"] ) lowerBounds[fullVarType == "rangefromci"] = ( - varValList[fullVarType == "rangefromci"] - ciValues[fullVarType == "rangefromci"] + varValList[fullVarType == "rangefromci"] - ciValues[fullVarType == "rangefromci"] ) upperBounds[fullVarType == "rangefromci"] = ( - varValList[fullVarType == "rangefromci"] + ciValues[fullVarType == "rangefromci"] + varValList[fullVarType == "rangefromci"] + ciValues[fullVarType == "rangefromci"] ) # create a sample of parameter values using scipy latin hypercube or morris sampling @@ -1102,9 +1102,10 @@ def createCfgFiles(paramValuesDList, comMod, cfg, cfgPath=""): cfgStart[section][par] = str(pVal[index]) else: cfgStart["GENERAL"][par] = str(pVal[index]) - if modName.lower() in ["com1dfa", "com5snowslide", "com6rockavalanche"]: + if modName.lower() in ["com1dfa", "com5snowslide", "com6rockavalanche", 'com8motpsa']: cfgStart["VISUALISATION"]["scenario"] = str(count1) cfgStart["INPUT"]["thFromIni"] = paramValuesD["thFromIni"] + cfgStart["VISUALISATION"]["sampleMethod"] = cfg['PROBRUN']['sampleMethod'] if "releaseScenario" in paramValuesD.keys(): cfgStart["INPUT"]["releaseScenario"] = paramValuesD["releaseScenario"] cfgF = pathlib.Path(cfgPath, ("%d_%sCfg.ini" % (countS, modName))) From 97fd3805ff2bc808415bffe583342745bc9e3abc Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Thu, 26 Feb 2026 11:31:36 +0100 Subject: [PATCH 04/20] fix: handle new _L1/_L2 naming in merging logic and rename some variables --- avaframe/ana6Optimisation/optimisationUtils.py | 12 +++++++++--- avaframe/out3Plot/outAna6Plots.py | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index f2314bfbb..c7bcec60e 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -190,19 +190,20 @@ def addLossMetrics(df, referenceDF, cfgOpt): denomTversky = TP + alpha * FP + beta * FN df["tversky_score"] = np.where(denomTversky != 0, TP / denomTversky, 0.0) # Subtract 1 to ensure that 0 are good values and 1 bad - tverskyModified = 1 - df["tversky_score"] + df['1-tversky'] = 1 - df["tversky_score"] # Runout # RMSE divided with Runout length of Reference sRunoutRef = referenceDF['reference_sRunout'].values runoutRMSE = df['runoutLineDiff_poly_RMSE'] - runoutRMSENormalised = runoutRMSE / sRunoutRef # 0 is good, 1 is bad + df['runoutRMSENormalised'] = runoutRMSE / sRunoutRef # 0 is good, 1 is bad # Weights wTversky = float(cfgOpt['LOSS_PARAMETERS']['weightTversky']) wRunout = float(cfgOpt['LOSS_PARAMETERS']['weightRunout']) - df['optimisationVariable'] = (wRunout * runoutRMSENormalised.fillna(1) + wTversky * tverskyModified.fillna(1)) + df['optimisationVariable'] = ( + wRunout * df['runoutRMSENormalised'].fillna(1) + wTversky * df['1-tversky'].fillna(1)) return df @@ -250,6 +251,11 @@ def buildFinalDF(avalancheDir, varParList, cfgOpt): arealIndicatorDir = pathlib.Path(avalancheDir, 'Outputs', 'out1Peak', 'arealIndicators.pkl') indicatorsDF = readArealIndicators(arealIndicatorDir) + # remove rows ending with '_L1' + indicatorsDF = indicatorsDF.loc[~indicatorsDF["simName"].str.endswith("_L1")].copy() + # remove trailing '_L2' for merging on simName + indicatorsDF["simName"] = (indicatorsDF["simName"].str.replace(r"_L2$", "", regex=True)) + # Merge df's df_merged = pd.merge(paramSetDF, df_aimec, on='simName', how='inner') df_merged = df_merged.merge(indicatorsDF, on="simName", how="left") diff --git a/avaframe/out3Plot/outAna6Plots.py b/avaframe/out3Plot/outAna6Plots.py index 82e105d96..02e03024e 100644 --- a/avaframe/out3Plot/outAna6Plots.py +++ b/avaframe/out3Plot/outAna6Plots.py @@ -444,8 +444,8 @@ def saveBestorCurrentModelrun(finalDF, paramSelected, ei=None, lcb=None, simName # Subset df, save only important entries cols_keep = ['simName', 'sampleMethods', 'order', 'Simulation time (s)', 'Minimum time step (s)', 'Initial CFL number (-)', 'TP_SimRef_cells', 'TP_SimRef_area', 'FP_SimRef_cells', 'FP_SimRef_area', - 'FN_SimRef_cells', 'FN_SimRef_area', 'recall', 'precision', 'f1_score', 'tversky_score', - 'optimisationVariable'] + 'FN_SimRef_cells', 'FN_SimRef_area', 'recall', 'precision', 'f1_score', 'tversky_score', '1-tversky', + 'runoutRMSENormalised', 'optimisationVariable'] columns_keep = cols_keep[:6] + [p for p in paramSelected if p not in cols_keep] + cols_keep[6:] df = finalDF[columns_keep] From 7c32be04d576cea73fe51572f347b3fd1dd94f82 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Fri, 27 Feb 2026 10:24:57 +0100 Subject: [PATCH 05/20] change function name, move parameter to ini file, change values of ini file, delete redundant code --- avaframe/ana6Optimisation/optimisationUtils.py | 15 ++++++++------- avaframe/ana6Optimisation/runOptimisation.py | 5 +++-- avaframe/ana6Optimisation/runOptimisationCfg.ini | 12 ++++++++---- avaframe/com8MoTPSA/runAna4ProbAnaCom8MoTPSA.py | 13 ++++--------- avaframe/out3Plot/outAna6Plots.py | 2 +- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index c7bcec60e..4f506a4eb 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -344,7 +344,7 @@ def fitSurrogate(df, cfgOpt): return X, y, gp_pipe -def KFoldCV(X, y, pipe, cfgOpt, outDir, avaName, pipeName): +def KFoldCrossValidation(X, y, pipe, cfgOpt, outDir, avaName, pipeName): """ Perform k-fold cross-validation for a regression pipeline. @@ -466,7 +466,7 @@ def optimiseNonSeq(pipe, cfgOpt, paramBounds): return topNStat -def analyzeTopCandidates(df_candidates, mu, sigma, param_cols, N=5): +def analyzeTopCandidates(df_candidates, mu, sigma, param_cols, N): """ Analyze the top N surrogate candidates. @@ -480,7 +480,7 @@ def analyzeTopCandidates(df_candidates, mu, sigma, param_cols, N=5): Predicted uncertainty values. param_cols : list of str Parameter column names. - N : int, optional + N : int Number of best candidates to analyze. Returns @@ -527,7 +527,7 @@ def analyzeTopCandidates(df_candidates, mu, sigma, param_cols, N=5): print(f"📊 sigma: {best_sigma:.4f}") return { - f"TopNBest": { + "TopNBest": { "mean_params": mean_params, "std_params": std_params, "mean_mu": mean_mu, @@ -543,7 +543,7 @@ def analyzeTopCandidates(df_candidates, mu, sigma, param_cols, N=5): }, topNData -def expectedImprovement(mu, sigma, f_best, xi=0): +def expectedImprovement(mu, sigma, f_best, xi): """ Compute the Expected Improvement (EI) acquisition function for minimization problems. @@ -558,7 +558,7 @@ def expectedImprovement(mu, sigma, f_best, xi=0): Predicted standard deviations of the surrogate model. f_best : float Best observed objective value so far. - xi : float, optional + xi : float Exploration parameter controlling exploitation–exploration trade-off. @@ -650,7 +650,8 @@ def EINextPoint(pipe, y, paramBounds, cfgOpt): print(mu.mean(), mu.max(), sigma.mean(), sigma.max()) # EI or LCB for minimization - ei = expectedImprovement(mu, sigma, f_best) + xi = float(cfgOpt['OPTIMISATION']['xi']) + ei = expectedImprovement(mu, sigma, f_best, xi) lcb = lowerConfidenceBound(mu, sigma) xBest = X0[np.argmax(ei)].copy() diff --git a/avaframe/ana6Optimisation/runOptimisation.py b/avaframe/ana6Optimisation/runOptimisation.py index b0ec5266d..7a88a345f 100644 --- a/avaframe/ana6Optimisation/runOptimisation.py +++ b/avaframe/ana6Optimisation/runOptimisation.py @@ -55,7 +55,7 @@ X, y, gp_pipe = optimisationUtils.fitSurrogate(emulatorDF, cfgOpt) # X,y are features of emulatorDF # K fold cross validation - optimisationUtils.KFoldCV(X, y, gp_pipe, cfgOpt, outDir, avaName, "Gaussian Process Matern Kernel") + optimisationUtils.KFoldCrossValidation(X, y, gp_pipe, cfgOpt, outDir, avaName, "Gaussian Process Matern Kernel") # Fit final pipline gp_pipe.fit(X, y) @@ -108,7 +108,8 @@ X, y, gp_pipe = optimisationUtils.fitSurrogate(emulatorDF, cfgOpt) # X,y are features of emulatorDF # K fold cross validation - optimisationUtils.KFoldCV(X, y, gp_pipe, cfgOpt, outDir, avaName, "Gaussian Process Matern Kernel") + optimisationUtils.KFoldCrossValidation(X, y, gp_pipe, cfgOpt, outDir, avaName, + "Gaussian Process Matern Kernel") # Fit final pipline gp_pipe.fit(X, y) diff --git a/avaframe/ana6Optimisation/runOptimisationCfg.ini b/avaframe/ana6Optimisation/runOptimisationCfg.ini index be8a3dbd4..e8b568091 100644 --- a/avaframe/ana6Optimisation/runOptimisationCfg.ini +++ b/avaframe/ana6Optimisation/runOptimisationCfg.ini @@ -18,9 +18,9 @@ resultDir = Outputs/ana6Optimisation [PARAM_BOUNDS] # 2 scenarios: choose 1 or 2 -scenario = 2 +scenario = 1 #(1): morris is run prior, then dataframe of ranked input parameters is already saved by runMorris.py as pickle file, user only need to determine how much input paramters to use for optimisation -topN = 6 +topN = 3 # (2): morris is not run prior, input parameters and their bounds are read from a previously saved pickle file generated by ``runAna4ProbAnaCom8MoTPSA.py``. The parameter variation is determined by the configuration specified in ``probAnaCfg.ini``. @@ -36,7 +36,7 @@ weightRunout = 0.75 [OPTIMISATION] # Type of optimisation: seq or nonseq -optType = seq +optType = nonseq # define the smoothness of the matern kernel of the surrogate, e.g.: 0.5, 1.5, 2.5 matern_nu = 2.5 # define k for k-fold cross validation @@ -51,10 +51,14 @@ n_surrogate_top = 100 n_model_top = 6 # settings for seq: +# xi controls the exploration–exploitation balance in Expected Improvement +# Increasing xi encourages exploration (sampling uncertain regions) +# Decreasing xi favors exploitation (sampling near current optimum) +xi = 0.0 # if ei is smaller than threshold, the simulation is counted as 'good', if number of good simulations is reached, optimisation stops eiThreshold = 0.01 numberOfGoodSimulations = 5 -bo_max_iterations = 2 +bo_max_iterations = 20 diff --git a/avaframe/com8MoTPSA/runAna4ProbAnaCom8MoTPSA.py b/avaframe/com8MoTPSA/runAna4ProbAnaCom8MoTPSA.py index 0d482bb7e..7aa83d92b 100644 --- a/avaframe/com8MoTPSA/runAna4ProbAnaCom8MoTPSA.py +++ b/avaframe/com8MoTPSA/runAna4ProbAnaCom8MoTPSA.py @@ -37,9 +37,6 @@ def runProbAna(avalancheDir=''): # log file name; leave empty to use default runLog.log logName = 'runAna4ProbAna' - # Load general configuration filee - cfgMain = cfgUtils.getGeneralConfig() - # Load avalanche directory from general configuration file, # if not provided as input argument cfgMain = cfgUtils.getGeneralConfig() @@ -97,7 +94,6 @@ def runProbAna(avalancheDir=''): if anaPerformed is False: log.warning('No files found for configuration: %s' % probConf) - # make a plot of the contours inputDir = pathlib.Path(avalancheDir, 'Outputs', 'ana4Stats') outName = '%s_prob_%s_%s_lim%s' % (str(avalancheDir.stem), @@ -116,16 +112,15 @@ def runProbAna(avalancheDir=''): # plot probability maps sP.plotProbMap(avalancheDir, inputDir, cfgProb, demPlot=True) - # # copy outputs to folder called like probability configurations - # outputFiles = avalancheDir / 'Outputs' / 'ana4Stats' - # saveFiles = avalancheDir / 'Outputs' / ('ana4Stats_' + probConf) - # shutil.move(outputFiles, saveFiles) + # # copy outputs to folder called like probability configurations + # outputFiles = avalancheDir / 'Outputs' / 'ana4Stats' + # saveFiles = avalancheDir / 'Outputs' / ('ana4Stats_' + probConf) + # shutil.move(outputFiles, saveFiles) return if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Run ana4ProbAna workflow') parser.add_argument('avadir', metavar='a', type=str, nargs='?', default='', help='the avalanche directory') diff --git a/avaframe/out3Plot/outAna6Plots.py b/avaframe/out3Plot/outAna6Plots.py index 02e03024e..2f1cfa844 100644 --- a/avaframe/out3Plot/outAna6Plots.py +++ b/avaframe/out3Plot/outAna6Plots.py @@ -50,7 +50,7 @@ def barplotSA(SiDF, avaName, outDir): # 4) Plot x = np.arange(len(df)) fig, ax = plt.subplots(figsize=(12, 6)) - bars = ax.bar( + ax.bar( x, mu_pct, width=bar_widths, edgecolor="black", capsize=3 if mu_pct_conf is not None else 0 ) From 81b2d68ac7c8e01f7ef9a642fae34310e71ae4d8 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Fri, 27 Feb 2026 11:49:51 +0100 Subject: [PATCH 06/20] Add: if __name__ == '__main__' --- avaframe/ana6Optimisation/runMorrisSA.py | 174 ++++++------- avaframe/ana6Optimisation/runOptimisation.py | 244 ++++++++++--------- 2 files changed, 215 insertions(+), 203 deletions(-) diff --git a/avaframe/ana6Optimisation/runMorrisSA.py b/avaframe/ana6Optimisation/runMorrisSA.py index 13b030fc8..446a8ce45 100644 --- a/avaframe/ana6Optimisation/runMorrisSA.py +++ b/avaframe/ana6Optimisation/runMorrisSA.py @@ -1,6 +1,3 @@ -""" -Run script for the Morris sensitivity analysis. For usage read README_ana6.md. -""" import sys import numpy as np import pandas as pd @@ -14,84 +11,93 @@ import avaframe.out3Plot.outAna6Plots as saveResults import optimisationUtils -# Get module config -module = sys.modules[__name__] -cfgMorrisSA = cfgUtils.getModuleConfig(module, toPrint=False) - -# Load avalanche directory from general configuration file -cfgMain = cfgUtils.getGeneralConfig() -avalancheDir = cfgMain['MAIN']['avalancheDir'] -avaName = pathlib.Path(avalancheDir).name - -# Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak -optimisationUtils.calcArealIndicatorsAndAimec(cfgMorrisSA, avalancheDir, ana3AIMEC) - -# Load variation data with bounds from pickle file -inDir = pathlib.Path(avalancheDir, 'Outputs', "ana4Stats") -paramValuesD = pd.read_pickle(inDir / "paramValuesD.pickle") -varParList = paramValuesD['names'] -# -# Read and merge results from parameter sets (simulation data), areal indicators and AIMEC -finalDF = optimisationUtils.buildFinalDF(avalancheDir, varParList, cfgMorrisSA) - -# Use only morris samples -morrisDF = finalDF[finalDF['sampleMethods'] == 'morris'].copy(deep=True) -# Set order as index -morrisDF.set_index('order', inplace=True) -# Order df based on order (which is the index) -morrisDF.sort_index(inplace=True) - -# Define input for SA -problem = { - 'num_vars': len(varParList), - 'names': varParList, - 'bounds': paramValuesD['bounds'] -} -samples = np.vstack(morrisDF['parameterSet'].values).astype(float) -Y = morrisDF['optimisationVariable'].values - -# Perform SA -Si = morris_analyze.analyze( - problem, - samples, - Y, - conf_level=float(cfgMorrisSA['MORRIS SA']['conf_level']), - num_levels=int(cfgMorrisSA['MORRIS SA']['num_levels']) -) - -# Rank Parameters -SiData = { - "Parameter": Si['names'], - "mu_star": Si['mu_star'], - "sigma": Si['sigma'], - "mu_star_conf": Si['mu_star_conf']} - -# Convert to dataframe -SiDF = pd.DataFrame(SiData) - -# Create folder for saving the results of the analysis, if not already existing -resultDir = cfgMorrisSA['GENERAL']['resultDir'] -comModuleName = cfgMorrisSA['GENERAL']['modName'] -outDir = pathlib.Path(avalancheDir, resultDir, comModuleName) -fU.makeADir(outDir) - -saveResults.barplotSA(SiDF, avaName, outDir) -saveResults.scatterplotSA(SiDF, avaName, outDir) -saveResults.scatterplotUncertaintySA(SiDF, avaName, outDir) - -# Sort SA results -SiDFSort = SiDF.sort_values("mu_star", ascending=False).reset_index(drop=True) -# Append bounds to SiDFSort -paramBounds = dict(zip(problem["names"], problem["bounds"])) -SiDFSort["bounds"] = SiDFSort["Parameter"].map(paramBounds) -# Save as Pickle for Optimization -SiDFSort.to_pickle(outDir / f"{avaName}_sortedSAResultsWithBounds.pkl") - -# Create df with parameters and the loss function for summary statistics -paramLossDF, paramLossScaledDF = optimisationUtils.createDFParameterLoss(morrisDF, SiDFSort['Parameter']) -N = int(cfgMorrisSA['MORRIS SA']['N']) -paramLossSubsetDF = paramLossDF.sort_values(by='Loss', ascending=True)[:N] -# Save mean values of best input parameters as csv -date = datetime.now().strftime("%Y%m%d") -csvPath = f"{outDir}/{avaName}_MorrisBEST{N}Simulations_{date}.csv" -paramLossSubsetDF.describe().to_csv(csvPath) + +def runMorrisSA(): + """ + Run script for the Morris sensitivity analysis. For usage read README_ana6.md. + """ + # Get module config + module = sys.modules[__name__] + cfgMorrisSA = cfgUtils.getModuleConfig(module, toPrint=False) + + # Load avalanche directory from general configuration file + cfgMain = cfgUtils.getGeneralConfig() + avalancheDir = cfgMain['MAIN']['avalancheDir'] + avaName = pathlib.Path(avalancheDir).name + + # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak + optimisationUtils.calcArealIndicatorsAndAimec(cfgMorrisSA, avalancheDir, ana3AIMEC) + + # Load variation data with bounds from pickle file + inDir = pathlib.Path(avalancheDir, 'Outputs', "ana4Stats") + paramValuesD = pd.read_pickle(inDir / "paramValuesD.pickle") + varParList = paramValuesD['names'] + # + # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC + finalDF = optimisationUtils.buildFinalDF(avalancheDir, varParList, cfgMorrisSA) + + # Use only morris samples + morrisDF = finalDF[finalDF['sampleMethods'] == 'morris'].copy(deep=True) + # Set order as index + morrisDF.set_index('order', inplace=True) + # Order df based on order (which is the index) + morrisDF.sort_index(inplace=True) + + # Define input for SA + problem = { + 'num_vars': len(varParList), + 'names': varParList, + 'bounds': paramValuesD['bounds'] + } + samples = np.vstack(morrisDF['parameterSet'].values).astype(float) + Y = morrisDF['optimisationVariable'].values + + # Perform SA + Si = morris_analyze.analyze( + problem, + samples, + Y, + conf_level=float(cfgMorrisSA['MORRIS SA']['conf_level']), + num_levels=int(cfgMorrisSA['MORRIS SA']['num_levels']) + ) + + # Rank Parameters + SiData = { + "Parameter": Si['names'], + "mu_star": Si['mu_star'], + "sigma": Si['sigma'], + "mu_star_conf": Si['mu_star_conf']} + + # Convert to dataframe + SiDF = pd.DataFrame(SiData) + + # Create folder for saving the results of the analysis, if not already existing + resultDir = cfgMorrisSA['GENERAL']['resultDir'] + comModuleName = cfgMorrisSA['GENERAL']['modName'] + outDir = pathlib.Path(avalancheDir, resultDir, comModuleName) + fU.makeADir(outDir) + + saveResults.barplotSA(SiDF, avaName, outDir) + saveResults.scatterplotSA(SiDF, avaName, outDir) + saveResults.scatterplotUncertaintySA(SiDF, avaName, outDir) + + # Sort SA results + SiDFSort = SiDF.sort_values("mu_star", ascending=False).reset_index(drop=True) + # Append bounds to SiDFSort + paramBounds = dict(zip(problem["names"], problem["bounds"])) + SiDFSort["bounds"] = SiDFSort["Parameter"].map(paramBounds) + # Save as Pickle for Optimization + SiDFSort.to_pickle(outDir / f"{avaName}_sortedSAResultsWithBounds.pkl") + + # Create df with parameters and the loss function for summary statistics + paramLossDF, paramLossScaledDF = optimisationUtils.createDFParameterLoss(morrisDF, SiDFSort['Parameter']) + N = int(cfgMorrisSA['MORRIS SA']['N']) + paramLossSubsetDF = paramLossDF.sort_values(by='Loss', ascending=True)[:N] + # Save mean values of best input parameters as csv + date = datetime.now().strftime("%Y%m%d") + csvPath = f"{outDir}/{avaName}_MorrisBEST{N}Simulations_{date}.csv" + paramLossSubsetDF.describe().to_csv(csvPath) + + +if __name__ == '__main__': + runMorrisSA() diff --git a/avaframe/ana6Optimisation/runOptimisation.py b/avaframe/ana6Optimisation/runOptimisation.py index 7a88a345f..125f72f3b 100644 --- a/avaframe/ana6Optimisation/runOptimisation.py +++ b/avaframe/ana6Optimisation/runOptimisation.py @@ -1,6 +1,3 @@ -""" -Run script for the optimization process. For usage read README_ana6.md. -""" import sys import pathlib import pickle @@ -11,69 +8,34 @@ import avaframe.out3Plot.outAna6Plots as saveResults import optimisationUtils -# Get module config -module = sys.modules[__name__] -cfgOpt = cfgUtils.getModuleConfig(module, toPrint=False) - -# Load avalanche directory from general configuration file -cfgMain = cfgUtils.getGeneralConfig() -avalancheDir = cfgMain['MAIN']['avalancheDir'] -avaName = pathlib.Path(avalancheDir).name - -# Create folder for saving the results of the analysis, if not already existing -resultDirOpt = cfgOpt['GENERAL']['resultDir'] -comModuleName = cfgOpt['GENERAL']['modName'] -outDir = pathlib.Path(avalancheDir, resultDirOpt, comModuleName) -fU.makeADir(outDir) - -# Get config from morris for path to morris results -cfgDir = 'runMorrisSA.ini' -cfgMorrisSA = cfgUtils.getModuleConfig(pathlib.Path(cfgDir), toPrint=False) -resultDirMorris = cfgMorrisSA['GENERAL']['resultDir'] -inDir = pathlib.Path(avalancheDir, resultDirMorris, comModuleName) - -# Load variation parameters and their bounds -paramBounds, paramSelected = optimisationUtils.loadVariationData(cfgOpt, inDir, avalancheDir) - -# Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak -optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) - -# Read and merge results from parameter sets (simulation data), areal indicators and AIMEC -finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) - -# ---------------------------------------------------------------------------------------------------------------------- -optimisationType = cfgOpt['OPTIMISATION']['optType'] -if optimisationType == 'nonseq': - csv_path = outDir / f"{avaName}_BestOrCurrentSimulation_NonSeq.csv" - # Save sim with currently best y - saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, csv_path=csv_path) - - # Create df with most important parameters and the loss function - emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) - - # Create surrogate - X, y, gp_pipe = optimisationUtils.fitSurrogate(emulatorDF, cfgOpt) # X,y are features of emulatorDF - - # K fold cross validation - optimisationUtils.KFoldCrossValidation(X, y, gp_pipe, cfgOpt, outDir, avaName, "Gaussian Process Matern Kernel") - - # Fit final pipline - gp_pipe.fit(X, y) - - # Optimize non-sequential (only use pipe once to find best param) - topNStat = optimisationUtils.optimiseNonSeq(gp_pipe, cfgOpt, paramBounds) - - # Run com8 with mean parameters of best N surrogate evaluations - simNameMean = optimisationUtils.runCom8MoTPSA(avalancheDir, topNStat['TopNBest']['mean_params'], cfgMain, - optimisationType='nonSeq') - # Run com8 with parameters of best surrogate evaluations - simNameBest = optimisationUtils.runCom8MoTPSA(avalancheDir, topNStat['Best']['params'], cfgMain, - optimisationType='nonSeq') - # SimName could be None if sim is already available, if so get the name from finalDF - if simNameMean is None: - simNameMean = optimisationUtils.findSimName(finalDF, topNStat["TopNBest"]["mean_params"], atol=1e-6) - if simNameBest is None: - simNameBest = optimisationUtils.findSimName(finalDF, topNStat["Best"]["params"], atol=1e-6) + +def runOptimisation(): + """ + Run script for the optimization process. For usage read README_ana6.md. + """ + # Get module config + module = sys.modules[__name__] + cfgOpt = cfgUtils.getModuleConfig(module, toPrint=False) + + # Load avalanche directory from general configuration file + cfgMain = cfgUtils.getGeneralConfig() + avalancheDir = cfgMain['MAIN']['avalancheDir'] + avaName = pathlib.Path(avalancheDir).name + + # Create folder for saving the results of the analysis, if not already existing + resultDirOpt = cfgOpt['GENERAL']['resultDir'] + comModuleName = cfgOpt['GENERAL']['modName'] + outDir = pathlib.Path(avalancheDir, resultDirOpt, comModuleName) + fU.makeADir(outDir) + + # Get config from morris for path to morris results + cfgDir = 'runMorrisSA.ini' + cfgMorrisSA = cfgUtils.getModuleConfig(pathlib.Path(cfgDir), toPrint=False) + resultDirMorris = cfgMorrisSA['GENERAL']['resultDir'] + inDir = pathlib.Path(avalancheDir, resultDirMorris, comModuleName) + + # Load variation parameters and their bounds + paramBounds, paramSelected = optimisationUtils.loadVariationData(cfgOpt, inDir, avalancheDir) # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) @@ -81,44 +43,39 @@ # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) - # Create image of table - saveResults.saveTopCandidates(finalDF, paramSelected, cfgOpt, topNStat, - out_path=outDir / f"{avaName}_StatisticsBestParameterValues_NonSeqAnalysis.png", - title=f"{avaName}, NonSeq-Analysis: Best Surrogate vs Best Model Run", - simNameMean=simNameMean, simNameBest=simNameBest) - - # Save latest real sim - saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, simName=simNameMean, csv_path=csv_path) - -# ---------------------------------------------------------------------------------------------------------------------- -elif optimisationType == 'seq': - csv_path = outDir / f"{avaName}_BestOrCurrentSimulation_BO.csv" - # Save sim with currently best y - saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, csv_path=csv_path) - - eiThreshold = float(cfgOpt['OPTIMISATION']['eiThreshold']) - nGoodSims = float(cfgOpt['OPTIMISATION']['numberOfGoodSimulations']) - countGoodSims = 0 - bo_max_iterations = int(cfgOpt['OPTIMISATION']['bo_max_iterations']) + # ---------------------------------------------------------------------------------------------------------------------- + optimisationType = cfgOpt['OPTIMISATION']['optType'] + if optimisationType == 'nonseq': + csv_path = outDir / f"{avaName}_BestOrCurrentSimulation_NonSeq.csv" + # Save sim with currently best y + saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, csv_path=csv_path) - for i in range(bo_max_iterations): # Create df with most important parameters and the loss function emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) - # Train surrogate + + # Create surrogate X, y, gp_pipe = optimisationUtils.fitSurrogate(emulatorDF, cfgOpt) # X,y are features of emulatorDF # K fold cross validation - optimisationUtils.KFoldCrossValidation(X, y, gp_pipe, cfgOpt, outDir, avaName, - "Gaussian Process Matern Kernel") + optimisationUtils.KFoldCrossValidation(X, y, gp_pipe, cfgOpt, outDir, avaName, "Gaussian Process Matern Kernel") # Fit final pipline gp_pipe.fit(X, y) - # Get next input parameters with EI - xBest, xBestDict, ei, lcb = optimisationUtils.EINextPoint(gp_pipe, y, paramBounds, cfgOpt) - - # Run com8 with best x - simName = optimisationUtils.runCom8MoTPSA(avalancheDir, xBestDict, cfgMain, i, optimisationType='seq') + # Optimize non-sequential (only use pipe once to find best param) + topNStat = optimisationUtils.optimiseNonSeq(gp_pipe, cfgOpt, paramBounds) + + # Run com8 with mean parameters of best N surrogate evaluations + simNameMean = optimisationUtils.runCom8MoTPSA(avalancheDir, topNStat['TopNBest']['mean_params'], cfgMain, + optimisationType='nonSeq') + # Run com8 with parameters of best surrogate evaluations + simNameBest = optimisationUtils.runCom8MoTPSA(avalancheDir, topNStat['Best']['params'], cfgMain, + optimisationType='nonSeq') + # SimName could be None if sim is already available, if so get the name from finalDF + if simNameMean is None: + simNameMean = optimisationUtils.findSimName(finalDF, topNStat["TopNBest"]["mean_params"], atol=1e-6) + if simNameBest is None: + simNameBest = optimisationUtils.findSimName(finalDF, topNStat["Best"]["params"], atol=1e-6) # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) @@ -126,28 +83,77 @@ # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) - # Save latest sim - saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, ei, lcb, simName, - csv_path=csv_path) - # If ei is smaller than threshold, the simulation is counted as 'good', if number of good simulations is - # reached, optimization stops - if ei < eiThreshold: - countGoodSims = countGoodSims + 1 - if countGoodSims >= nGoodSims: - break - - saveResults.saveTopCandidates(finalDF, paramSelected, cfgOpt, - out_path=outDir / f"{avaName}_StatisticsBestParameterValues_BOAnalysis.png", - title=f"{avaName}, Seq-Analysis: Best Model Runs") - - # Save BO plots - n_top_samples = int(cfgOpt['OPTIMISATION']['n_model_top']) - emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) - - saveResults.BOConvergencePlot(finalDF, avaName, outDir) - saveResults.BOBoxplot(emulatorDF, avaName, outDir, N=n_top_samples) - saveResults.BOBoxplotNormalised(emulatorDF, paramBounds, avaName, outDir, N=n_top_samples) - -# Save pickle file of finalDF -with open(outDir / f"{avaName}_finalDF.pickle", "wb") as fi: - pickle.dump(finalDF, fi) + # Create image of table + saveResults.saveTopCandidates(finalDF, paramSelected, cfgOpt, topNStat, + out_path=outDir / f"{avaName}_StatisticsBestParameterValues_NonSeqAnalysis.png", + title=f"{avaName}, NonSeq-Analysis: Best Surrogate vs Best Model Run", + simNameMean=simNameMean, simNameBest=simNameBest) + + # Save latest real sim + saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, simName=simNameMean, csv_path=csv_path) + + # ---------------------------------------------------------------------------------------------------------------------- + elif optimisationType == 'seq': + csv_path = outDir / f"{avaName}_BestOrCurrentSimulation_BO.csv" + # Save sim with currently best y + saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, csv_path=csv_path) + + eiThreshold = float(cfgOpt['OPTIMISATION']['eiThreshold']) + nGoodSims = float(cfgOpt['OPTIMISATION']['numberOfGoodSimulations']) + countGoodSims = 0 + bo_max_iterations = int(cfgOpt['OPTIMISATION']['bo_max_iterations']) + + for i in range(bo_max_iterations): + # Create df with most important parameters and the loss function + emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) + # Train surrogate + X, y, gp_pipe = optimisationUtils.fitSurrogate(emulatorDF, cfgOpt) # X,y are features of emulatorDF + + # K fold cross validation + optimisationUtils.KFoldCrossValidation(X, y, gp_pipe, cfgOpt, outDir, avaName, + "Gaussian Process Matern Kernel") + + # Fit final pipline + gp_pipe.fit(X, y) + + # Get next input parameters with EI + xBest, xBestDict, ei, lcb = optimisationUtils.EINextPoint(gp_pipe, y, paramBounds, cfgOpt) + + # Run com8 with best x + simName = optimisationUtils.runCom8MoTPSA(avalancheDir, xBestDict, cfgMain, i, optimisationType='seq') + + # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak + optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) + + # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC + finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) + + # Save latest sim + saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, ei, lcb, simName, + csv_path=csv_path) + # If ei is smaller than threshold, the simulation is counted as 'good', if number of good simulations is + # reached, optimization stops + if ei < eiThreshold: + countGoodSims = countGoodSims + 1 + if countGoodSims >= nGoodSims: + break + + saveResults.saveTopCandidates(finalDF, paramSelected, cfgOpt, + out_path=outDir / f"{avaName}_StatisticsBestParameterValues_BOAnalysis.png", + title=f"{avaName}, Seq-Analysis: Best Model Runs") + + # Save BO plots + n_top_samples = int(cfgOpt['OPTIMISATION']['n_model_top']) + emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) + + saveResults.BOConvergencePlot(finalDF, avaName, outDir) + saveResults.BOBoxplot(emulatorDF, avaName, outDir, N=n_top_samples) + saveResults.BOBoxplotNormalised(emulatorDF, paramBounds, avaName, outDir, N=n_top_samples) + + # Save pickle file of finalDF + with open(outDir / f"{avaName}_finalDF.pickle", "wb") as fi: + pickle.dump(finalDF, fi) + + +if __name__ == '__main__': + runOptimisation() From 76b0dc86d6df7ff5ec55df25f64f7bc0bf04236c Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Fri, 27 Feb 2026 13:23:11 +0100 Subject: [PATCH 07/20] Fix: remove duplicate in com8MoTPSAMain and read chunkSize from avaframeCfg.ini nCPU --- avaframe/com8MoTPSA/com8MoTPSA.py | 76 ++++++++++++------------------- 1 file changed, 30 insertions(+), 46 deletions(-) diff --git a/avaframe/com8MoTPSA/com8MoTPSA.py b/avaframe/com8MoTPSA/com8MoTPSA.py index 679636ca5..687efb06a 100644 --- a/avaframe/com8MoTPSA/com8MoTPSA.py +++ b/avaframe/com8MoTPSA/com8MoTPSA.py @@ -51,58 +51,42 @@ def com8MoTPSAMain(cfgMain, cfgInfo=None, returnSimName=None): # Preprocess the simulations, mainly creating the rcf files rcfFiles = com8MoTPSAPreprocess(simDict, inputSimFiles, cfgMain) + # Check if there is simulation to be run + if not rcfFiles: + log.warning("There is no simulation to be performed for releaseScenario") + return None + # And now we run the simulations startTime = time.time() log.info("--- STARTING (potential) PARALLEL PART ----") # Split into chunks to postprocess and clean up work dirs incrementally - chunkSize = 8 - if len(rcfFiles) > chunkSize: - for i in range(0, len(rcfFiles), chunkSize): - rcfFilesChunk = rcfFiles[i:i + chunkSize] - simNamesChunk = [p.stem for p in rcfFilesChunk] - - nCPU = cfgUtils.getNumberOfProcesses(cfgMain, len(rcfFilesChunk)) - - if bool(simNamesChunk): - with Pool(processes=nCPU) as pool: - results = pool.map(com8MoTPSATask, rcfFilesChunk) - pool.close() - pool.join() - - timeNeeded = "%.2f" % (time.time() - startTime) - log.info("Overall (parallel) com8MoTPSA computation took: %s s " % timeNeeded) - log.info("--- ENDING (potential) PARALLEL PART ----") - - # Postprocess the simulations - com8MoTPSAPostprocess(simNamesChunk, cfgMain, inputSimFiles) - - # Delete folder in Work directory after postprocessing to reduce memory costs - avaDir = cfgMain["MAIN"]["avalancheDir"] - for sim in simNamesChunk: - folderName = "Work/com8MoTPSA/" + sim - _checkForFolderAndDelete(avaDir, folderName) - else: - log.warning("There is no simulation to be performed for releaseScenario") - else: - nCPU = cfgUtils.getNumberOfProcesses(cfgMain, len(rcfFiles)) - - simNames = [p.stem for p in rcfFiles] - if bool(simNames): - with Pool(processes=nCPU) as pool: - results = pool.map(com8MoTPSATask, rcfFiles) - pool.close() - pool.join() - - timeNeeded = "%.2f" % (time.time() - startTime) - log.info("Overall (parallel) com8MoTPSA computation took: %s s " % timeNeeded) - log.info("--- ENDING (potential) PARALLEL PART ----") - - # Postprocess the simulations - com8MoTPSAPostprocess(simNames, cfgMain, inputSimFiles) - else: - log.warning("There is no simulation to be performed for releaseScenario") + chunkSize = cfgUtils.getNumberOfProcesses(cfgMain, len(rcfFiles)) + rcfChunks = [rcfFiles[i:i + chunkSize] for i in range(0, len(rcfFiles), chunkSize)] + + for rcfFilesChunk in rcfChunks: + simNamesChunk = [p.stem for p in rcfFilesChunk] + + nCPU = cfgUtils.getNumberOfProcesses(cfgMain, len(rcfFilesChunk)) + + with Pool(processes=nCPU) as pool: + results = pool.map(com8MoTPSATask, rcfFilesChunk) + pool.close() + pool.join() + + timeNeeded = "%.2f" % (time.time() - startTime) + log.info("Overall (parallel) com8MoTPSA computation took: %s s ", timeNeeded) + log.info("--- ENDING (potential) PARALLEL PART ----") + + # Postprocess the simulations + com8MoTPSAPostprocess(simNamesChunk, cfgMain, inputSimFiles) + + # Delete folder in Work directory after postprocessing to reduce memory costs + avaDir = cfgMain["MAIN"]["avalancheDir"] + for sim in simNamesChunk: + folderName = "Work/com8MoTPSA/" + sim + _checkForFolderAndDelete(avaDir, folderName) if returnSimName is not None and simDict: return next(iter(simDict)) From 3d831a70764d22ce916161df621527fd77ce8e95 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Fri, 27 Feb 2026 19:43:12 +0100 Subject: [PATCH 08/20] Implement a direct call of com8MoTPSAMain with updated cfgs (writeCfgFiles in optimisationUtils.py), add possibility to RUN writeCfgFiles with counter --- .../ana6Optimisation/optimisationUtils.py | 123 ++++++++++-------- avaframe/ana6Optimisation/runOptimisation.py | 41 ++++-- 2 files changed, 94 insertions(+), 70 deletions(-) diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index 4f506a4eb..356e7229f 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -15,7 +15,8 @@ from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel -from avaframe.in3Utils import cfgUtils, initializeProject, logUtils +from avaframe.in3Utils import cfgUtils, logUtils +from avaframe.in3Utils import fileHandlerUtils as fU from avaframe.com8MoTPSA import com8MoTPSA from avaframe.ana4Stats import probAna from avaframe.runScripts.runPlotAreaRefDiffs import runPlotAreaRefDiffs @@ -661,68 +662,76 @@ def EINextPoint(pipe, y, paramBounds, cfgOpt): return xBest, xBestDict, np.max(ei), np.max(lcb) -def runCom8MoTPSA(avalancheDir, xBestDict, cfgMain, i=0, optimisationType=None): +def writeCfgFiles(avalancheDir, paramSets, optimisationType, comModuleName, counter=None): """ - Based on the default runCom8MoTPSA function in com8MoTPSA/runCom8MoTPSA.py file, but overrides parameter values in - the module configuration using the values provided in ``xBestDict``. It also assigns a unique visualisation scenario - identifier and records the sampling method for traceability. + Generate and write configuration files for a given computation module + based on optimisation results. + + Two configuration files are created: + 1. Using the mean parameter values of the top-N surrogate evaluations. + 2. Using the single best surrogate parameter set. Parameters ---------- - avalancheDir: str - Path to avalanche directory. - xBestDict : dict - Mapping of parameter name to selected value for ``xBest``. - cfgMain : configparser.ConfigParser - General configuration for avaframe. - i : int, optional - Counter for identifying the number of iterations. - optimisationType: str, optional - Name of the optimisation type, sequential or non-sequential. + avalancheDir : str or pathlib.Path + Path to the avalanche directory. + paramSets : dict or list of dict + Parameter set(s) to write to config files + optimisationType : str + Optimisation mode ('nonSeq' or 'seq'), stored in the config file + for traceability. + comModuleName : str + Name of the computation module (e.g. "com8MoTPSA"). + Used for naming the configuration directory and files. + counter : int or None, optional + If provided, use this value as starting index for scenario/file naming. + Useful when calling this function inside an outer optimisation loop. Returns - --------- - simName: str - Name of the simulation. - """ - # Time the whole routine - startTime = time.time() - - # log file name; leave empty to use default runLog.log - logName = 'runCom8MoTPSA' - - # Start logging - log = logUtils.initiateLogger(avalancheDir, logName) - log.info('MAIN SCRIPT') - log.info('Current avalanche: %s', avalancheDir) - # ---------------- - # Clean input directory(ies) of old work and output files - # If you just created the ``avalancheDir`` this one should be clean but if you - # already did some calculations you might want to clean it:: - initializeProject.cleanSingleAvaDir(avalancheDir, deleteOutput=False) - # Get module config - cfgCom8MoTPSA = cfgUtils.getModuleConfig(com8MoTPSA, toPrint=False) - - # overwrite com8MoTPSACfg with xBest values - for param, val in xBestDict.items(): - # print(param, val) - section = probAna.fetchParameterSection(cfgCom8MoTPSA, param) - cfgCom8MoTPSA[section][param] = str(val) - # give visualisation unique scenario for identifying later - cfgCom8MoTPSA['VISUALISATION']['scenario'] = str(i) - if optimisationType == 'nonSeq': - cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = 'nonSeq' - else: - cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = 'EI/LCB' - - # ---------------- - # Run psa - simName = com8MoTPSA.com8MoTPSAMain(cfgMain, cfgInfo=cfgCom8MoTPSA, returnSimName=True) - # Print time needed - endTime = time.time() - log.info('Took %6.1f seconds to calculate.' % (endTime - startTime)) - - return simName + ------- + cfgFiles : list of pathlib.Path + Paths to the written configuration files. + cfgPath : pathlib.Path + Directory where configuration files were stored. + """ + # Allow single dict as input + if isinstance(paramSets, dict): + paramSets = [paramSets] + + cfgFiles = [] + avaDir = pathlib.Path(avalancheDir) + # Directory where generated configuration files will be saved + cfgPath = avaDir / "Work" / f"{comModuleName}ConfigFiles" + fU.makeADir(cfgPath) + + # If counter is given, start indexing from there; otherwise start at 0 + start = int(counter) if counter is not None else 0 + + for i, xBestDict in enumerate(paramSets): + idx = start + i # index used for scenario + filename + + # Load a fresh module configuration template + cfgCom8MoTPSA = cfgUtils.getModuleConfig(com8MoTPSA, toPrint=False) + # Overwrite parameters in their corresponding sections + for param, val in xBestDict.items(): + section = probAna.fetchParameterSection(cfgCom8MoTPSA, param) + cfgCom8MoTPSA[section][param] = str(val) + + # Assign unique scenario ID and optimisation type for later identification + cfgCom8MoTPSA['VISUALISATION']['scenario'] = str(idx) + if optimisationType == 'nonSeq': + cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = 'nonSeq' + else: + cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = 'EI/LCB' + + # Write configuration file to disk + cfgF = cfgPath / f"{idx}_{comModuleName}Cfg.ini" + with open(cfgF, "w") as configfile: + cfgCom8MoTPSA.write(configfile) + + cfgFiles.append(cfgF) + + return cfgFiles, cfgPath def findSimName(finalDF, paramValue, atol=1e-6): diff --git a/avaframe/ana6Optimisation/runOptimisation.py b/avaframe/ana6Optimisation/runOptimisation.py index 125f72f3b..df7d03b13 100644 --- a/avaframe/ana6Optimisation/runOptimisation.py +++ b/avaframe/ana6Optimisation/runOptimisation.py @@ -4,8 +4,11 @@ from avaframe.in3Utils import cfgUtils from avaframe.in3Utils import fileHandlerUtils as fU +from avaframe.in3Utils import initializeProject as initProj from avaframe.ana3AIMEC import ana3AIMEC import avaframe.out3Plot.outAna6Plots as saveResults +from avaframe.ana4Stats import probAna +from avaframe.com8MoTPSA import com8MoTPSA import optimisationUtils @@ -64,18 +67,19 @@ def runOptimisation(): # Optimize non-sequential (only use pipe once to find best param) topNStat = optimisationUtils.optimiseNonSeq(gp_pipe, cfgOpt, paramBounds) + paramSets = [ + topNStat["TopNBest"]["mean_params"], + topNStat["Best"]["params"], + ] + # Generate and write config files with mean parameter values of best N surrogate evaluations and with best + # parameter values of surrogate + # Clean input directory(ies) of old work files + initProj.cleanSingleAvaDir(avalancheDir, deleteOutput=False) - # Run com8 with mean parameters of best N surrogate evaluations - simNameMean = optimisationUtils.runCom8MoTPSA(avalancheDir, topNStat['TopNBest']['mean_params'], cfgMain, - optimisationType='nonSeq') - # Run com8 with parameters of best surrogate evaluations - simNameBest = optimisationUtils.runCom8MoTPSA(avalancheDir, topNStat['Best']['params'], cfgMain, - optimisationType='nonSeq') - # SimName could be None if sim is already available, if so get the name from finalDF - if simNameMean is None: - simNameMean = optimisationUtils.findSimName(finalDF, topNStat["TopNBest"]["mean_params"], atol=1e-6) - if simNameBest is None: - simNameBest = optimisationUtils.findSimName(finalDF, topNStat["Best"]["params"], atol=1e-6) + cfgFiles, cfgPath = optimisationUtils.writeCfgFiles(avalancheDir, paramSets, optimisationType, comModuleName) + + # Perform com8MoTPSA simulations + com8MoTPSA.com8MoTPSAMain(cfgMain, cfgInfo=cfgPath) # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) @@ -83,6 +87,9 @@ def runOptimisation(): # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) + simNameMean = optimisationUtils.findSimName(finalDF, topNStat["TopNBest"]["mean_params"], atol=1e-6) + simNameBest = optimisationUtils.findSimName(finalDF, topNStat["Best"]["params"], atol=1e-6) + # Create image of table saveResults.saveTopCandidates(finalDF, paramSelected, cfgOpt, topNStat, out_path=outDir / f"{avaName}_StatisticsBestParameterValues_NonSeqAnalysis.png", @@ -119,8 +126,15 @@ def runOptimisation(): # Get next input parameters with EI xBest, xBestDict, ei, lcb = optimisationUtils.EINextPoint(gp_pipe, y, paramBounds, cfgOpt) - # Run com8 with best x - simName = optimisationUtils.runCom8MoTPSA(avalancheDir, xBestDict, cfgMain, i, optimisationType='seq') + # Clean input directory(ies) of old work files + initProj.cleanSingleAvaDir(avalancheDir, deleteOutput=False) + + # Generate and write config file + cfgFiles, cfgPath = optimisationUtils.writeCfgFiles(avalancheDir, xBestDict, optimisationType, + comModuleName, counter=i) + + # Perform com8MoTPSA simulation + com8MoTPSA.com8MoTPSAMain(cfgMain, cfgInfo=cfgPath) # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) @@ -129,6 +143,7 @@ def runOptimisation(): finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) # Save latest sim + simName = optimisationUtils.findSimName(finalDF, xBestDict, atol=1e-6) saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, ei, lcb, simName, csv_path=csv_path) # If ei is smaller than threshold, the simulation is counted as 'good', if number of good simulations is From 8843c22291766c2049e96e1119f8eadde5b5a283 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Fri, 27 Feb 2026 20:56:33 +0100 Subject: [PATCH 09/20] Use logging instead of print in optimisationUtils.py --- .../ana6Optimisation/optimisationUtils.py | 37 +++++++++++++------ avaframe/ana6Optimisation/runOptimisation.py | 8 +++- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index 356e7229f..ccbb026c8 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -1,6 +1,7 @@ import numpy as np import pathlib import pickle +import logging import pandas as pd import configparser import os @@ -22,6 +23,10 @@ from avaframe.runScripts.runPlotAreaRefDiffs import runPlotAreaRefDiffs import avaframe.out3Plot.outAna6Plots as saveResults +# create local logger +# log = logging.getLogger(__name__) +log = logging.getLogger("avaframe") + def calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC): """ @@ -407,12 +412,13 @@ def KFoldCrossValidation(X, y, pipe, cfgOpt, outDir, avaName, pipeName): row[f"{split} {m} mean"] = arr.mean() row[f"{split} {m} std"] = arr.std() - print(f"\n{pipeName}, {k}-fold CV:") + # Log results of cross validation + log.info(f"\n{pipeName}, {k}-fold CV:") for split in ("test", "train"): - print(f" {split.capitalize()} metrics:") + log.info(f" {split.capitalize()} metrics:") for m in ("rmse", "mae", "r2"): arr = scores[f"{split}_{m}"] - print(f" {m.upper():<4}: {arr.mean():.4g} ± {arr.std():.4g}") + log.info(f" {m.upper():<4}: {arr.mean():.4g} ± {arr.std():.4g}") df = pd.DataFrame([row]) # Include date, format: YYYYMMDD @@ -505,15 +511,18 @@ def analyzeTopCandidates(df_candidates, mu, sigma, param_cols, N): mean_sigma = topNData["sigma"].mean() std_sigma = topNData["sigma"].std() - print(f"\n🔍 Mittelwerte ± Std (Top {N}):") + log.info(f"Surrogate Top {N} candidates: mean ± std") + for p in param_cols: m, s = mean_params[p], std_params[p] perc = (s / m * 100) if m != 0 else np.nan - print(f" {p:30s}: {m:.6f} ± {s:.6f} ({perc:.1f}%)") + log.info(f" {p:30s}: {m:.6f} ± {s:.6f} ({perc:.1f}%%)") + perc_mu = (std_mu / mean_mu * 100) if mean_mu != 0 else np.nan perc_sigma = (std_sigma / mean_sigma * 100) if mean_sigma != 0 else np.nan - print(f"📉 mu: {mean_mu:.4f} ± {std_mu:.4f} ({perc_mu:.1f}%)") - print(f"📊 sigma: {mean_sigma:.4f} ± {std_sigma:.4f} ({perc_sigma:.1f}%)") + + log.info(f"mu: {mean_mu:.4f} ± {std_mu:.4f} ({perc_mu:.1f}%%)") + log.info(f"sigma: {mean_sigma:.4f} ± {std_sigma:.4f} ({perc_sigma:.1f}%%)") # Best single point idx_best = np.argmin(mu) @@ -521,11 +530,13 @@ def analyzeTopCandidates(df_candidates, mu, sigma, param_cols, N): best_loss = mu[idx_best] best_sigma = sigma[idx_best] - print("\n🔍 Best single Parameter combination:") + log.info("Best single parameter combination from Surrogate:") + for p in param_cols: - print(f" {p:30s}: {best_params[p]:.4f}") - print(f"📉 mu: {best_loss:.4f}") - print(f"📊 sigma: {best_sigma:.4f}") + log.info(f" {p:30s}: {best_params[p]:.4f}") + + log.info(f"mu: {best_loss:.4f}") + log.info(f"sigma: {best_sigma:.4f}") return { "TopNBest": { @@ -648,7 +659,9 @@ def EINextPoint(pipe, y, paramBounds, cfgOpt): # Predict with pipe mu, sigma = pipe.predict(X0, return_std=True) - print(mu.mean(), mu.max(), sigma.mean(), sigma.max()) + log.info("Prediction statistics:") + log.info(" mu mean=%.6f max=%.6f", mu.mean(), mu.max()) + log.info(" sigma mean=%.6f max=%.6f", sigma.mean(), sigma.max()) # EI or LCB for minimization xi = float(cfgOpt['OPTIMISATION']['xi']) diff --git a/avaframe/ana6Optimisation/runOptimisation.py b/avaframe/ana6Optimisation/runOptimisation.py index df7d03b13..f0d432331 100644 --- a/avaframe/ana6Optimisation/runOptimisation.py +++ b/avaframe/ana6Optimisation/runOptimisation.py @@ -2,7 +2,7 @@ import pathlib import pickle -from avaframe.in3Utils import cfgUtils +from avaframe.in3Utils import cfgUtils, logUtils from avaframe.in3Utils import fileHandlerUtils as fU from avaframe.in3Utils import initializeProject as initProj from avaframe.ana3AIMEC import ana3AIMEC @@ -37,6 +37,12 @@ def runOptimisation(): resultDirMorris = cfgMorrisSA['GENERAL']['resultDir'] inDir = pathlib.Path(avalancheDir, resultDirMorris, comModuleName) + # Start logging + logName = 'runOptimisation' + log = logUtils.initiateLogger(avalancheDir, logName) + log.info('MAIN SCRIPT') + log.info('Current avalanche: %s', avalancheDir) + # Load variation parameters and their bounds paramBounds, paramSelected = optimisationUtils.loadVariationData(cfgOpt, inDir, avalancheDir) From 51ac8a19e131941a6524eae00904f33935c0fd05 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Sat, 28 Feb 2026 14:12:43 +0100 Subject: [PATCH 10/20] Add sklearn (scikit-learn) to the requirements in pyproject.toml and clean header Implement modName to make code general and remove adjustText package --- .../ana6Optimisation/optimisationUtils.py | 2 - avaframe/ana6Optimisation/runOptimisation.py | 1 - .../com8MoTPSA/runAna4ProbAnaCom8MoTPSA.py | 2 - avaframe/in3Utils/cfgUtils.py | 44 +++++++------------ avaframe/out3Plot/outAna6Plots.py | 17 +++++-- pyproject.toml | 1 + 6 files changed, 29 insertions(+), 38 deletions(-) diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index ccbb026c8..76ee856e7 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -5,7 +5,6 @@ import pandas as pd import configparser import os -import time from scipy.stats import norm, qmc from datetime import datetime import re @@ -24,7 +23,6 @@ import avaframe.out3Plot.outAna6Plots as saveResults # create local logger -# log = logging.getLogger(__name__) log = logging.getLogger("avaframe") diff --git a/avaframe/ana6Optimisation/runOptimisation.py b/avaframe/ana6Optimisation/runOptimisation.py index f0d432331..463ca392f 100644 --- a/avaframe/ana6Optimisation/runOptimisation.py +++ b/avaframe/ana6Optimisation/runOptimisation.py @@ -7,7 +7,6 @@ from avaframe.in3Utils import initializeProject as initProj from avaframe.ana3AIMEC import ana3AIMEC import avaframe.out3Plot.outAna6Plots as saveResults -from avaframe.ana4Stats import probAna from avaframe.com8MoTPSA import com8MoTPSA import optimisationUtils diff --git a/avaframe/com8MoTPSA/runAna4ProbAnaCom8MoTPSA.py b/avaframe/com8MoTPSA/runAna4ProbAnaCom8MoTPSA.py index 7aa83d92b..3d3da6a3c 100644 --- a/avaframe/com8MoTPSA/runAna4ProbAnaCom8MoTPSA.py +++ b/avaframe/com8MoTPSA/runAna4ProbAnaCom8MoTPSA.py @@ -9,7 +9,6 @@ import argparse # Local imports -from avaframe.com1DFA import com1DFA import avaframe.com8MoTPSA.com8MoTPSA as com8MoTPSA from avaframe.out3Plot import statsPlots as sP from avaframe.ana4Stats import probAna @@ -18,7 +17,6 @@ from avaframe.in3Utils import logUtils import avaframe.in3Utils.fileHandlerUtils as fU from avaframe.out3Plot import outQuickPlot as oP -import configparser def runProbAna(avalancheDir=''): diff --git a/avaframe/in3Utils/cfgUtils.py b/avaframe/in3Utils/cfgUtils.py index 0f4ab5aac..f6d3527e3 100644 --- a/avaframe/in3Utils/cfgUtils.py +++ b/avaframe/in3Utils/cfgUtils.py @@ -55,7 +55,6 @@ from avaframe.in3Utils import logUtils from avaframe.in3Utils import fileHandlerUtils as fU - log = logging.getLogger(__name__) @@ -96,7 +95,7 @@ def getGeneralConfig(nameFile=""): def getModuleConfig( - module, avalancheDir="", fileOverride="", batchCfgDir="", modInfo=False, toPrint=True, onlyDefault=False + module, avalancheDir="", fileOverride="", batchCfgDir="", modInfo=False, toPrint=True, onlyDefault=False ): """Returns the configuration for a given module returns a configParser object OR pathlib.Path (when batchCfgDir is used) @@ -721,12 +720,12 @@ def writeDictToJson(inDict, outFilePath): def createConfigurationInfo( - avaDir, - comModule="com1DFA", - standardCfg="", - writeCSV=False, - specDir="", - simNameList=[], + avaDir, + comModule="com1DFA", + standardCfg="", + writeCSV=False, + specDir="", + simNameList=[], ): """Read configurations from all simulations configuration ini files from directory @@ -991,10 +990,7 @@ def readConfigurationInfoFromDone(avaDir, specDir="", latest=False, modName=''): if specDir != "": inDir = pathlib.Path(specDir, "configurationFiles") else: - if modName == 'com8MoTPSA': - inDir = pathlib.Path(avaDir, "Outputs", "com8MoTPSA", "configurationFiles") - else: - inDir = pathlib.Path(avaDir, "Outputs", "com1DFA", "configurationFiles") + inDir = pathlib.Path(avaDir, "Outputs", modName, "configurationFiles") # search inDir/configurationFilesDone or inDir/configurationFilesLatest (depending on latest flag) for already existing sims if latest: @@ -1012,23 +1008,13 @@ def readConfigurationInfoFromDone(avaDir, specDir="", latest=False, modName=''): simDF = None else: # create simDF (dataFrame with one row per simulation of configuration files found in configDir) - if modName == 'com8MoTPSA': - simDF = createConfigurationInfo( - avaDir, - comModule="com8MoTPSA", - standardCfg="", - writeCSV=False, - specDir=specDir, - simNameList=simNameExisting, - ) - else: - simDF = createConfigurationInfo( - avaDir, - comModule="com1DFA", - standardCfg="", - writeCSV=False, - specDir=specDir, - simNameList=simNameExisting, + simDF = createConfigurationInfo( + avaDir, + comModule=modName, + standardCfg="", + writeCSV=False, + specDir=specDir, + simNameList=simNameExisting, ) # check for allConfigurationsInfo to find computation info and add to info fetched from ini files diff --git a/avaframe/out3Plot/outAna6Plots.py b/avaframe/out3Plot/outAna6Plots.py index 2f1cfa844..a4922c516 100644 --- a/avaframe/out3Plot/outAna6Plots.py +++ b/avaframe/out3Plot/outAna6Plots.py @@ -2,7 +2,6 @@ import pandas as pd import pathlib import matplotlib.pyplot as plt -from adjustText import adjust_text from datetime import datetime @@ -134,9 +133,19 @@ def scatterplotUncertaintySA(SiDF, avaName, outDir): fmt='o', color='blue', ecolor='gray', elinewidth=1.5, capsize=4 ) - # Annotations with adjustText - texts = [plt.text(SiDF['mu_star'][i], SiDF['sigma'][i], SiDF['Parameter'][i], fontsize=9) for i in range(len(SiDF))] - adjust_text(texts, arrowprops=dict(arrowstyle='->', color='gray', lw=0.5)) + x_range = SiDF['mu_star'].max() - SiDF['mu_star'].min() + y_range = SiDF['sigma'].max() - SiDF['sigma'].min() + + x_offset = 0.01 * x_range + y_offset = 0.01 * y_range + + for i in range(len(SiDF)): + plt.text( + SiDF['mu_star'][i] + x_offset, + SiDF['sigma'][i] + y_offset, + SiDF['Parameter'][i], + fontsize=9 + ) # Axes and layout plt.xlabel('mu_star (Einflussstärke)', fontsize=12) diff --git a/pyproject.toml b/pyproject.toml index afe62dd2b..cdcde8914 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ dependencies = [ "contextily", "geopandas", "fiona", + "scikit-learn", ] # Setuptools From 0f4e968d9d6b0dd6298971a2c470b7d86de646b7 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Sat, 28 Feb 2026 15:32:12 +0100 Subject: [PATCH 11/20] Initialize index and sampleMethod with np.nan to avoid UnboundLocalError and stale values and remove copy paste error. --- avaframe/ana6Optimisation/optimisationUtils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index 76ee856e7..c7b0c32d5 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -88,17 +88,17 @@ def readParamSetDF(inDir, varParList): config = configparser.ConfigParser() config.read(filepath) + # Set defaults per file + index = np.nan + sampleMethod = np.nan + if 'VISUALISATION' in config.sections(): # config is inifile index = config['VISUALISATION']['scenario'] - - if 'VISUALISATION' in config.sections(): - # config is inifile - index = config['VISUALISATION']['scenario'] - if 'sampleMethod' in config['VISUALISATION']: - sampleMethod = config['VISUALISATION']['sampleMethod'] - else: - sampleMethod = np.nan + if 'sampleMethod' in config['VISUALISATION']: + sampleMethod = config['VISUALISATION']['sampleMethod'] + else: + sampleMethod = np.nan row = [] # row contains 1 row for param in varParList: From 4d36ca4b6c9200c5633261e0ca6f8c57aefac836 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Sat, 28 Feb 2026 16:03:12 +0100 Subject: [PATCH 12/20] Add a check if 'VISUALISATION' exists in cfgStart, if not, it will be added to cfgStart. And read sample method with fallback; meaning if 'PROBANA' or 'sampleMethod' is missing sample_method contains an empty string --- avaframe/ana4Stats/probAna.py | 10 +++++++++- avaframe/ana6Optimisation/optimisationUtils.py | 4 ++-- avaframe/ana6Optimisation/runMorrisSA.py | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/avaframe/ana4Stats/probAna.py b/avaframe/ana4Stats/probAna.py index 2108fb7d3..e4344ccdf 100644 --- a/avaframe/ana4Stats/probAna.py +++ b/avaframe/ana4Stats/probAna.py @@ -1103,9 +1103,17 @@ def createCfgFiles(paramValuesDList, comMod, cfg, cfgPath=""): else: cfgStart["GENERAL"][par] = str(pVal[index]) if modName.lower() in ["com1dfa", "com5snowslide", "com6rockavalanche", 'com8motpsa']: + # Check if visualisation exists in cfgStart, if not add the section + if not cfgStart.has_section("VISUALISATION"): + cfgStart.add_section("VISUALISATION") + cfgStart["VISUALISATION"]["scenario"] = str(count1) cfgStart["INPUT"]["thFromIni"] = paramValuesD["thFromIni"] - cfgStart["VISUALISATION"]["sampleMethod"] = cfg['PROBRUN']['sampleMethod'] + + # Safe read with fallback (no KeyError if PROBRUN or sampleMethod is missing) + sample_method = cfg.get("PROBRUN", "sampleMethod", fallback="") + cfgStart["VISUALISATION"]["sampleMethod"] = sample_method + if "releaseScenario" in paramValuesD.keys(): cfgStart["INPUT"]["releaseScenario"] = paramValuesD["releaseScenario"] cfgF = pathlib.Path(cfgPath, ("%d_%sCfg.ini" % (countS, modName))) diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index c7b0c32d5..8de056415 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -91,7 +91,7 @@ def readParamSetDF(inDir, varParList): # Set defaults per file index = np.nan sampleMethod = np.nan - + if 'VISUALISATION' in config.sections(): # config is inifile index = config['VISUALISATION']['scenario'] @@ -730,7 +730,7 @@ def writeCfgFiles(avalancheDir, paramSets, optimisationType, comModuleName, coun # Assign unique scenario ID and optimisation type for later identification cfgCom8MoTPSA['VISUALISATION']['scenario'] = str(idx) - if optimisationType == 'nonSeq': + if optimisationType == 'nonseq': cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = 'nonSeq' else: cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = 'EI/LCB' diff --git a/avaframe/ana6Optimisation/runMorrisSA.py b/avaframe/ana6Optimisation/runMorrisSA.py index 446a8ce45..6c6892366 100644 --- a/avaframe/ana6Optimisation/runMorrisSA.py +++ b/avaframe/ana6Optimisation/runMorrisSA.py @@ -32,7 +32,7 @@ def runMorrisSA(): inDir = pathlib.Path(avalancheDir, 'Outputs', "ana4Stats") paramValuesD = pd.read_pickle(inDir / "paramValuesD.pickle") varParList = paramValuesD['names'] - # + # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC finalDF = optimisationUtils.buildFinalDF(avalancheDir, varParList, cfgMorrisSA) From 0e7b4460424d8ffb61237b049b9d817afa5eff26 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Mon, 2 Mar 2026 10:43:38 +0100 Subject: [PATCH 13/20] Make optimisationType case-insensitive, add raise ValueErrors, update docstring, tidy up code, add if __name__='__main__': in runPlotMorrisConvergence.py and improve BOConvergencePlot. --- avaframe/ana6Optimisation/README_ana6.md | 2 +- .../ana6Optimisation/optimisationUtils.py | 46 +++--- avaframe/ana6Optimisation/runMorrisSA.py | 15 ++ avaframe/ana6Optimisation/runOptimisation.py | 24 ++- .../runPlotMorrisConvergence.py | 55 ++++--- avaframe/com8MoTPSA/com8MoTPSA.py | 1 + avaframe/out3Plot/outAna6Plots.py | 140 +++++++++++------- 7 files changed, 184 insertions(+), 99 deletions(-) diff --git a/avaframe/ana6Optimisation/README_ana6.md b/avaframe/ana6Optimisation/README_ana6.md index bca9db16c..473e19b1e 100644 --- a/avaframe/ana6Optimisation/README_ana6.md +++ b/avaframe/ana6Optimisation/README_ana6.md @@ -85,7 +85,7 @@ The convergence analysis evaluates how the Morris sensitivity indices stabilise **Requirements:** - Run `runAna4ProbAnaCom8MoTPSA` multiple times with different numbers of Morris trajectories -- Rename Output folders afterwards with the following naming convention: OutputsR +- Rename Output folders afterwards with the following naming convention: OutputsR`` where `` corresponds to the number of trajectories diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index 8de056415..4803fbc37 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -15,7 +15,7 @@ from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel -from avaframe.in3Utils import cfgUtils, logUtils +from avaframe.in3Utils import cfgUtils from avaframe.in3Utils import fileHandlerUtils as fU from avaframe.com8MoTPSA import com8MoTPSA from avaframe.ana4Stats import probAna @@ -143,7 +143,7 @@ def readArealIndicators(inDir): return indicatorsDF -def addLossMetrics(df, referenceDF, cfgOpt): +def addLossMetrics(df, referenceDF, cfg): """ Compute evaluation metrics (recall, precision, F1, Tversky score) and a combined weighted Loss (or optimisation) variable from a given DataFrame. @@ -158,8 +158,8 @@ def addLossMetrics(df, referenceDF, cfgOpt): ``TP_SimRef_area``, ``FP_SimRef_area``, ``FN_SimRef_area``. referenceDF: pandas.Dataframe Dataframe with information of the reference in AIMEC e.g. reference_sRunout of the polygon - cfgOpt: configparser.ConfigParser - Config parser of the runOptimisationCfg.ini file + cfg: configparser.ConfigParser + Config parser that contains values of the loss function, either runOptimisationCfg.ini or runMorrisSA.ini file Returns ------- @@ -188,8 +188,8 @@ def addLossMetrics(df, referenceDF, cfgOpt): df["f1_score"] = np.where(denomF1 != 0, 2.0 * df['precision'] * df['recall'] / denomF1, 0.0) # Tversky score = TP / (TP + alpha * FP + beta * FN), gives penalty to overshoot --> alpha - alpha = float(cfgOpt['LOSS_PARAMETERS']['tverskyAlpha']) - beta = float(cfgOpt['LOSS_PARAMETERS']['tverskyBeta']) + alpha = float(cfg['LOSS_PARAMETERS']['tverskyAlpha']) + beta = float(cfg['LOSS_PARAMETERS']['tverskyBeta']) denomTversky = TP + alpha * FP + beta * FN df["tversky_score"] = np.where(denomTversky != 0, TP / denomTversky, 0.0) @@ -203,15 +203,15 @@ def addLossMetrics(df, referenceDF, cfgOpt): df['runoutRMSENormalised'] = runoutRMSE / sRunoutRef # 0 is good, 1 is bad # Weights - wTversky = float(cfgOpt['LOSS_PARAMETERS']['weightTversky']) - wRunout = float(cfgOpt['LOSS_PARAMETERS']['weightRunout']) + wTversky = float(cfg['LOSS_PARAMETERS']['weightTversky']) + wRunout = float(cfg['LOSS_PARAMETERS']['weightRunout']) df['optimisationVariable'] = ( wRunout * df['runoutRMSENormalised'].fillna(1) + wTversky * df['1-tversky'].fillna(1)) return df -def buildFinalDF(avalancheDir, varParList, cfgOpt): +def buildFinalDF(avalancheDir, varParList, cfg): """ Build the final merged DataFrame for a given avalanche. @@ -224,8 +224,8 @@ def buildFinalDF(avalancheDir, varParList, cfgOpt): Path of avalanche directory varParList: list of str List of parameter names that are varied - cfgOpt: configparser.ConfigParser - Config parser of the runOptimisationCfg.ini file + cfg: configparser.ConfigParser + Config parser that contains values of the loss function, either runOptimisationCfg.ini or runMorrisSA.ini file Returns ------- @@ -265,7 +265,7 @@ def buildFinalDF(avalancheDir, varParList, cfgOpt): df_merged = df_merged.merge(indicatorsDF, on="simName", how="left") # Add optimisation variables - finalDF = addLossMetrics(df_merged, referenceDF, cfgOpt) + finalDF = addLossMetrics(df_merged, referenceDF, cfg) return finalDF @@ -689,7 +689,7 @@ def writeCfgFiles(avalancheDir, paramSets, optimisationType, comModuleName, coun paramSets : dict or list of dict Parameter set(s) to write to config files optimisationType : str - Optimisation mode ('nonSeq' or 'seq'), stored in the config file + Optimisation mode ('nonseq' or 'seq'), stored in the config file for traceability. comModuleName : str Name of the computation module (e.g. "com8MoTPSA"). @@ -730,10 +730,7 @@ def writeCfgFiles(avalancheDir, paramSets, optimisationType, comModuleName, coun # Assign unique scenario ID and optimisation type for later identification cfgCom8MoTPSA['VISUALISATION']['scenario'] = str(idx) - if optimisationType == 'nonseq': - cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = 'nonSeq' - else: - cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = 'EI/LCB' + cfgCom8MoTPSA["VISUALISATION"]["sampleMethod"] = optimisationType # Write configuration file to disk cfgF = cfgPath / f"{idx}_{comModuleName}Cfg.ini" @@ -819,11 +816,19 @@ def loadVariationData(cfgOpt, outDir, avaDir): # Load SA data and define how much parameters should be optimized (variation bounds included) SiDFSort = pd.read_pickle(outDir / f"{avaName}_sortedSAResultsWithBounds.pkl") topN = int(cfgOpt['PARAM_BOUNDS']['topN']) + n_available = len(SiDFSort['Parameter']) + # Check if enougth input parameters are available + if topN > n_available: + message = ( + f"Invalid number of parameters topN={topN}. Only {n_available} parameters available from Morris sensitivity" + f" analysis." + ) + raise ValueError(message) paramSelected = list(SiDFSort['Parameter'][:topN]) paramBounds = dict(zip(paramSelected, SiDFSort['bounds'])) # Scenario 2: Morris is not run prior - else: + elif scenario == 2: # Load variation data with bounds from pickle file inDir2 = pathlib.Path(avaDir, 'Outputs', "ana4Stats") paramValuesD = pd.read_pickle(inDir2 / "paramValuesD.pickle") @@ -832,6 +837,11 @@ def loadVariationData(cfgOpt, outDir, avaDir): name: (float(bounds[0]), float(bounds[1])) for name, bounds in zip(paramValuesD["names"], paramValuesD["bounds"]) } + else: + message = f"Unknown scenario '{scenario}' for variation data. Expected 1 or 2." + log.error(message) + raise ValueError(message) + return paramBounds, paramSelected diff --git a/avaframe/ana6Optimisation/runMorrisSA.py b/avaframe/ana6Optimisation/runMorrisSA.py index 6c6892366..f82ed0618 100644 --- a/avaframe/ana6Optimisation/runMorrisSA.py +++ b/avaframe/ana6Optimisation/runMorrisSA.py @@ -36,6 +36,12 @@ def runMorrisSA(): # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC finalDF = optimisationUtils.buildFinalDF(avalancheDir, varParList, cfgMorrisSA) + # Check if there is simulation in finalDF + nAvailable = len(finalDF) + if nAvailable == 0: + message = "No simulations found in finalDF." + raise RuntimeError(message) + # Use only morris samples morrisDF = finalDF[finalDF['sampleMethods'] == 'morris'].copy(deep=True) # Set order as index @@ -91,7 +97,16 @@ def runMorrisSA(): # Create df with parameters and the loss function for summary statistics paramLossDF, paramLossScaledDF = optimisationUtils.createDFParameterLoss(morrisDF, SiDFSort['Parameter']) + + # Raise error if topN is bigger than available model runs N = int(cfgMorrisSA['MORRIS SA']['N']) + if N > nAvailable: + message = ( + f"Invalid configuration: Requested top {N} simulations, but only {nAvailable} Morris simulations are " + "available." + ) + raise ValueError(message) + paramLossSubsetDF = paramLossDF.sort_values(by='Loss', ascending=True)[:N] # Save mean values of best input parameters as csv date = datetime.now().strftime("%Y%m%d") diff --git a/avaframe/ana6Optimisation/runOptimisation.py b/avaframe/ana6Optimisation/runOptimisation.py index 463ca392f..d13a8667e 100644 --- a/avaframe/ana6Optimisation/runOptimisation.py +++ b/avaframe/ana6Optimisation/runOptimisation.py @@ -1,6 +1,7 @@ import sys import pathlib import pickle +import pandas as pd from avaframe.in3Utils import cfgUtils, logUtils from avaframe.in3Utils import fileHandlerUtils as fU @@ -51,8 +52,15 @@ def runOptimisation(): # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) + # Check if there is simulation in finalDF + nAvailable = len(finalDF) + if nAvailable == 0: + message = "No simulations found in finalDF." + log.error(message) + raise RuntimeError(message) + # ---------------------------------------------------------------------------------------------------------------------- - optimisationType = cfgOpt['OPTIMISATION']['optType'] + optimisationType = cfgOpt['OPTIMISATION']['optType'].lower() if optimisationType == 'nonseq': csv_path = outDir / f"{avaName}_BestOrCurrentSimulation_NonSeq.csv" # Save sim with currently best y @@ -115,6 +123,11 @@ def runOptimisation(): countGoodSims = 0 bo_max_iterations = int(cfgOpt['OPTIMISATION']['bo_max_iterations']) + # Before the loop: start counter based on existing BO ("seq") simulations + # Important to avoid same order number across different BO runs + seq_orders = pd.to_numeric(finalDF.loc[finalDF["sampleMethods"] == "seq", "order"], errors="coerce") + start_counter = int(seq_orders.max() + 1) if seq_orders.notna().any() else 0 + for i in range(bo_max_iterations): # Create df with most important parameters and the loss function emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) @@ -136,7 +149,7 @@ def runOptimisation(): # Generate and write config file cfgFiles, cfgPath = optimisationUtils.writeCfgFiles(avalancheDir, xBestDict, optimisationType, - comModuleName, counter=i) + comModuleName, counter=start_counter + i) # Perform com8MoTPSA simulation com8MoTPSA.com8MoTPSAMain(cfgMain, cfgInfo=cfgPath) @@ -166,10 +179,15 @@ def runOptimisation(): n_top_samples = int(cfgOpt['OPTIMISATION']['n_model_top']) emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) - saveResults.BOConvergencePlot(finalDF, avaName, outDir) + saveResults.BOConvergencePlot(finalDF, avaName, outDir, cfgOpt) saveResults.BOBoxplot(emulatorDF, avaName, outDir, N=n_top_samples) saveResults.BOBoxplotNormalised(emulatorDF, paramBounds, avaName, outDir, N=n_top_samples) + else: + message = f"Unknown optimisation type '{optimisationType}'. Expected 'nonseq' or 'seq'." + log.error(message) + raise ValueError(message) + # Save pickle file of finalDF with open(outDir / f"{avaName}_finalDF.pickle", "wb") as fi: pickle.dump(finalDF, fi) diff --git a/avaframe/ana6Optimisation/runPlotMorrisConvergence.py b/avaframe/ana6Optimisation/runPlotMorrisConvergence.py index eca869a52..d2a3e0b1f 100644 --- a/avaframe/ana6Optimisation/runPlotMorrisConvergence.py +++ b/avaframe/ana6Optimisation/runPlotMorrisConvergence.py @@ -1,30 +1,37 @@ -""" -Run script for the Morris convergence plot. For usage read README_ana6.md. -""" import pathlib import avaframe.out3Plot.outAna6Plots as saveResults from avaframe.in3Utils import cfgUtils import optimisationUtils -# Load avalanche directory from general configuration file -cfgMain = cfgUtils.getGeneralConfig() -avalancheDir = cfgMain['MAIN']['avalancheDir'] -avaName = pathlib.Path(avalancheDir).name - -# Load morris config file -cfgDir = 'runMorrisSA.ini' -cfgMorrisSA = cfgUtils.getModuleConfig(pathlib.Path(cfgDir), toPrint=False) - -# Load Morris sensitivity analysis results for convergence plot -SA_dfs, r_vals, outputs, outDir, reference_r = optimisationUtils.loadMorrisConvergenceData(cfgMorrisSA, avalancheDir, - avaName) - -# Top 7 parameters -saveResults.plotMorrisConvergence(SA_dfs, r_vals, reference_r, k=7, - outpath=outDir / f'{avaName}_MorrisSAConvergencePlotTop7.png', - title=f"{avaName} Convergence plot for top 7 parameters") -# All parameters -saveResults.plotMorrisConvergence(SA_dfs, r_vals, reference_r, k=None, - outpath=outDir / f'{avaName}_MorrisSAConvergencePlotAll.png', - title=f"{avaName} Convergence plot for all parameters") + +def runPlotMorrisConvergence(): + """ + Run script for the Morris convergence plot. For usage read README_ana6.md. + """ + # Load avalanche directory from general configuration file + cfgMain = cfgUtils.getGeneralConfig() + avalancheDir = cfgMain['MAIN']['avalancheDir'] + avaName = pathlib.Path(avalancheDir).name + + # Load morris config file + cfgDir = 'runMorrisSA.ini' + cfgMorrisSA = cfgUtils.getModuleConfig(pathlib.Path(cfgDir), toPrint=False) + + # Load Morris sensitivity analysis results for convergence plot + SA_dfs, r_vals, outputs, outDir, reference_r = optimisationUtils.loadMorrisConvergenceData(cfgMorrisSA, + avalancheDir, + avaName) + + # Top 7 parameters + saveResults.plotMorrisConvergence(SA_dfs, r_vals, reference_r, k=7, + outpath=outDir / f'{avaName}_MorrisSAConvergencePlotTop7.png', + title=f"{avaName} Convergence plot for top 7 parameters") + # All parameters + saveResults.plotMorrisConvergence(SA_dfs, r_vals, reference_r, k=None, + outpath=outDir / f'{avaName}_MorrisSAConvergencePlotAll.png', + title=f"{avaName} Convergence plot for all parameters") + + +if __name__ == '__main__': + runPlotMorrisConvergence() diff --git a/avaframe/com8MoTPSA/com8MoTPSA.py b/avaframe/com8MoTPSA/com8MoTPSA.py index 687efb06a..f1d7a48ea 100644 --- a/avaframe/com8MoTPSA/com8MoTPSA.py +++ b/avaframe/com8MoTPSA/com8MoTPSA.py @@ -90,6 +90,7 @@ def com8MoTPSAMain(cfgMain, cfgInfo=None, returnSimName=None): if returnSimName is not None and simDict: return next(iter(simDict)) + return None def copyRawToLayerPeakFiles(workDir, outputDirPeakFile): diff --git a/avaframe/out3Plot/outAna6Plots.py b/avaframe/out3Plot/outAna6Plots.py index a4922c516..334770938 100644 --- a/avaframe/out3Plot/outAna6Plots.py +++ b/avaframe/out3Plot/outAna6Plots.py @@ -94,10 +94,10 @@ def scatterplotSA(SiDF, avaName, outDir): for i, txt in enumerate(SiDF['Parameter']): plt.annotate(txt, (SiDF['mu_star'][i], SiDF['sigma'][i]), fontsize=9, xytext=(5, 5), textcoords='offset points') - # Label and title - plt.xlabel('mu_star (Einflussstärke)', fontsize=12) - plt.ylabel('sigma (Nichtlinearität / Interaktionen)', fontsize=12) - plt.title('Morris Sensitivitätsanalyse: mu_star vs sigma', fontsize=14) + # Labels and title + plt.xlabel("mu_star (Influence)", fontsize=12) + plt.ylabel("sigma (Nonlinearity / interactions)", fontsize=12) + plt.title("Morris sensitivity analysis: mu_star vs sigma", fontsize=14) plt.grid(True) # Save figure @@ -147,10 +147,10 @@ def scatterplotUncertaintySA(SiDF, avaName, outDir): fontsize=9 ) - # Axes and layout - plt.xlabel('mu_star (Einflussstärke)', fontsize=12) - plt.ylabel('sigma (Nichtlinearität / Interaktionen)', fontsize=12) - plt.title('Morris Sensitivitätsanalyse: mu_star vs sigma (mit Unsicherheit)', fontsize=14) + # Labels and title + plt.xlabel("mu_star (Influence strength)", fontsize=12) + plt.ylabel("sigma (Nonlinearity / interactions)", fontsize=12) + plt.title("Morris sensitivity analysis: mu_star vs sigma", fontsize=14) # Save figure # Include date, format: YYYYMMDD @@ -159,14 +159,12 @@ def scatterplotUncertaintySA(SiDF, avaName, outDir): plt.savefig(figName, dpi=300, bbox_inches="tight") -def BOConvergencePlot(finalDF, avaName, outDir): +def BOConvergencePlot(finalDF, avaName, outDir, cfgOpt): """ - This function visualises the evolution of the optimisation variable - (loss) across different sampling phases: - - 1. Latin hypercube sampling - 2. Bayesian optimisation (EI/LCB) - 3. Optional non-sequential surrogate-based sampling + Visualises the evolution of the optimisation variable (loss) across sampling phases: + 1. Morris (scenario 1) OR Latin hypercube (scenario 2) + 2. Bayesian optimisation (EI/LCB) -> sampleMethods == "seq" + 3. Non-sequential surrogate-based sampling -> sampleMethods == "nonseq" Parameters ---------- @@ -179,57 +177,83 @@ def BOConvergencePlot(finalDF, avaName, outDir): outDir : pathlib.Path Directory where the convergence plot will be saved. + cfgOpt: configparser.ConfigParser + Configuration object containing the section 'PARAM_BOUNDS'. """ - # Color palette - c_best = '#4e79a7' - c_bo = '#59a14f' - c_lhs = '#76b7b2' - c_nonseq = '#e15759' + # Read scenario flag + scenario = int(cfgOpt["PARAM_BOUNDS"]["scenario"]) - df = finalDF.dropna( - subset=["sampleMethods", "order", "optimisationVariable"] - ).copy() + # Color palette + c_best = "#4e79a7" # Blue + c_bo = "#59a14f" # Green + c_lhs = "#76b7b2" # Turquoise + c_nonseq = "#e15759" # Red + c_morris = "#edc948" # Yellow + + df = finalDF.dropna(subset=["sampleMethods", "order", "optimisationVariable"]).copy() + + # Decide which method represents the initial sampling phase + if scenario == 1: + initial_method = "morris" + initial_label = "Morris" + initial_color = c_morris + elif scenario == 2: + initial_method = "latin" + initial_label = "LatinHypercube" + initial_color = c_lhs + else: + message = ( + f"Unknown scenario '{scenario}' in cfgOpt['PARAM_BOUNDS']['scenario']. " + "Expected 1 (Morris) or 2 (Latin hypercube)." + ) + raise ValueError(message) # Split by sampling method - latin = df[df["sampleMethods"] == "latin"].sort_values("order") - bo = df[df["sampleMethods"] == "EI/LCB"].sort_values("order") - nonseq = df[df["sampleMethods"] == "nonSeq"].sort_values("order") + initial = df[df["sampleMethods"] == initial_method].sort_values("order") + bo = df[df["sampleMethods"] == "seq"].sort_values("order") + nonseq = df[df["sampleMethods"] == "nonseq"].sort_values("order") - # Iteration axis + # Iteration axis: make phases contiguous on x-axis current_offset = 0 - if not latin.empty: - latin["iter"] = latin["order"] - current_offset = latin["iter"].max() + 1 + if not initial.empty: + initial = initial.copy() + initial["iter"] = initial["order"] + current_offset = int(initial["iter"].max()) + 1 if not bo.empty: + bo = bo.copy() bo["iter"] = bo["order"] + current_offset - current_offset = bo["iter"].max() + 1 + current_offset = int(bo["iter"].max()) + 1 if not nonseq.empty: nonseq = nonseq.sort_index().copy() nonseq["iter"] = np.arange(current_offset, current_offset + len(nonseq)) - current_offset = nonseq["iter"].max() + 1 + current_offset = int(nonseq["iter"].max()) + 1 - # Combine for best-so-far - all_parts = [latin, bo] + # Combine for best-so-far curve + parts = [initial, bo] if not nonseq.empty: - all_parts.append(nonseq) + parts.append(nonseq) - all_df = pd.concat(all_parts).sort_values("iter") + if not any(not p.empty for p in parts): + message = "No data available to plot (all sampling phases are empty after filtering)." + raise ValueError(message) + + all_df = pd.concat([p for p in parts if not p.empty], ignore_index=True).sort_values("iter") all_df["best_so_far"] = all_df["optimisationVariable"].cummin() # Plot fig, ax = plt.subplots(figsize=(9, 5)) - if not latin.empty: + if not initial.empty: ax.scatter( - latin["iter"], - latin["optimisationVariable"], + initial["iter"], + initial["optimisationVariable"], s=35, - alpha=0.7, - color=c_lhs, - label="Latin hypercube" + alpha=0.75, + color=initial_color, + label=initial_label, ) if not bo.empty: @@ -239,7 +263,7 @@ def BOConvergencePlot(finalDF, avaName, outDir): s=40, alpha=0.85, color=c_bo, - label="Bayesian optimisation (EI/LCB)" + label="Bayesian optimisation (EI/LCB)", ) if not nonseq.empty: @@ -249,7 +273,7 @@ def BOConvergencePlot(finalDF, avaName, outDir): s=40, alpha=0.85, color=c_nonseq, - label="Non-sequential sampling" + label="Non-sequential sampling", ) ax.plot( @@ -257,25 +281,26 @@ def BOConvergencePlot(finalDF, avaName, outDir): all_df["best_so_far"], linewidth=1.5, color=c_best, - label="Best-so-far" + label="Best-so-far", ) - # Add separator lines between sampling phases (if applicable) - if not latin.empty: - ax.axvline(latin["iter"].max() + 0.5, linestyle="--", linewidth=1, color="black") - if not bo.empty: - ax.axvline(bo["iter"].max() + 0.5, linestyle="--", linewidth=1, color="black") + # Separator lines (initial is assumed to exist) + if (not bo.empty) or (not nonseq.empty): + ax.axvline(initial["iter"].max() + 0.5, linestyle="--", linewidth=0.7, color="black") + + if (not bo.empty) and (not nonseq.empty): + ax.axvline(bo["iter"].max() + 0.5, linestyle="--", linewidth=0.7, color="black") ax.set_xlabel("Iteration") ax.set_ylabel("Optimisation variable (loss)") - ax.set_title("Convergence: Latin hypercube → Bayesian optimisation") - ax.legend(frameon=False, loc='best') + ax.set_title(f"Convergence: {initial_label} → Bayesian optimisation") + ax.legend(frameon=False, loc="best") fig.tight_layout() # Save figure date = datetime.now().strftime("%Y%m%d") - figName = f"{outDir}/{avaName}_BOConvergence_{date}.png" + figName = f"{outDir}/{avaName}_BOConvergence_{initial_label}_{date}.png" fig.savefig(figName, dpi=300, bbox_inches="tight") plt.close(fig) @@ -620,7 +645,16 @@ def _get_optvar_for_sim(sim_name, label): best_idx = finalDF["optimisationVariable"].idxmin() + # Raise error if topN is bigger than available model runs n_model_top = int(cfgOpt["OPTIMISATION"]["n_model_top"]) + nAvailable = len(finalDF.index) + if n_model_top > nAvailable: + message = ( + f"Invalid configuration: Requested top {n_model_top} simulations, but only {nAvailable} simulations are " + "available." + ) + raise ValueError(message) + topN = finalDF.nsmallest(n_model_top, "optimisationVariable") df_topN = summary_table(topN, paramSelected, best_row=finalDF.loc[best_idx]) From 67a588620c24cc4adff05458760405241ce15bd5 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Mon, 9 Mar 2026 12:27:33 +0100 Subject: [PATCH 14/20] Add description of Loss function to README_ana6.md. --- avaframe/ana6Optimisation/README_ana6.md | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/avaframe/ana6Optimisation/README_ana6.md b/avaframe/ana6Optimisation/README_ana6.md index 473e19b1e..979064175 100644 --- a/avaframe/ana6Optimisation/README_ana6.md +++ b/avaframe/ana6Optimisation/README_ana6.md @@ -16,7 +16,7 @@ The module contains the following files: --- ## Workflow -### Reference Data and Working Directory +### Working Directory All scripts must be executed within the directory: `avaframe/ana6Optimisation` @@ -24,7 +24,25 @@ In `avaframeCfg.ini`, the avalanche reference directory (`avalancheDir`) must in This ensures correct relative path resolution within the AvaFrame project structure. -To compute goodness-of-fit metrics between reference and simulation results and to perform AIMEC analysis, the following reference data must be provided in: `avaframe/data//Inputs` +--- + +### Loss Function + +The optimisation compares avalanche simulations with a reference runout polygon stored in `REFDATA`. Model performance is evaluated using a weighted loss function combining: + +- a modified Tversky score *(1 − Tversky)* +- the normalized RMSE of the runout length between simulation and reference + +The Tversky score is computed from areal indicators (TP, FP,FN) within a predefined cropshape. The areal indicators are calculated using `runPlotAreaRefDiffs.py`. Settings in `runMorrisSACfg.ini` and `runOptimisationCfg.ini`. + +The normalized runout length is computed using the AIMEC analysis implemented in `ana3AIMEC.py`. Settings in `ana3AIMECCfg.ini`. + +In `ana3AIMECCfg.ini`, the parameter `runoutLayer` defines which avalanche layer is used for the analysis (e.g. `L1` or `L2`). This selection is important, as the entire evaluation workflow (including AIMEC analysis, optimisation, and Morris sensitivity analysis) is performed using this selected layer. + +--- + +### Reference Data +To compute these goodness-of-fit metrics and to perform the AIMEC analysis, the following data must be provided in: `avaframe/data//Inputs`. The required folder structure is: Folder: @@ -54,7 +72,7 @@ The Morris sensitivity analysis provides a ranking of input parameters based on Before running `runMorrisSA.py`, the following step is required prior: -- Execute `runAna4ProbAnaCom8MoTPSA` +- Execute `runAna4ProbAnaCom8MoTPSA.py` - In `probAnaCfg.ini`: - Set the sampling method to `'morris'` - Define the number of Morris trajectories (`nSample`) From da3f6db32025835efa4610c64aefbf51aba5a2df Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Mon, 9 Mar 2026 12:27:33 +0100 Subject: [PATCH 15/20] Add description of Loss function to README_ana6.md and add parameter names from paper to com8MoTPSACfg.ini. --- avaframe/ana6Optimisation/README_ana6.md | 26 ++++++++++++++++++++---- avaframe/com8MoTPSA/com8MoTPSACfg.ini | 17 +++++++++++++++- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/avaframe/ana6Optimisation/README_ana6.md b/avaframe/ana6Optimisation/README_ana6.md index 473e19b1e..29cd59d87 100644 --- a/avaframe/ana6Optimisation/README_ana6.md +++ b/avaframe/ana6Optimisation/README_ana6.md @@ -16,15 +16,33 @@ The module contains the following files: --- ## Workflow -### Reference Data and Working Directory +### Working Directory -All scripts must be executed within the directory: `avaframe/ana6Optimisation` +The above mentioned runScripts must be executed within the directory: `avaframe/ana6Optimisation` In `avaframeCfg.ini`, the avalanche reference directory (`avalancheDir`) must include the suffix `../`, for example: `../data/avaFleisskar` This ensures correct relative path resolution within the AvaFrame project structure. -To compute goodness-of-fit metrics between reference and simulation results and to perform AIMEC analysis, the following reference data must be provided in: `avaframe/data//Inputs` +--- + +### Loss Function + +The optimisation compares avalanche simulations with a reference runout polygon stored in `REFDATA`. Model performance is evaluated using a weighted loss function combining: + +- a modified Tversky score *(1 − Tversky)* +- the normalized RMSE of the runout length between simulation and reference + +The Tversky score is computed from areal indicators (TP, FP,FN) within a predefined cropshape. The areal indicators are calculated using `runPlotAreaRefDiffs.py`. Settings in `runMorrisSACfg.ini` and `runOptimisationCfg.ini`. + +The normalized runout length is computed using the AIMEC analysis implemented in `ana3AIMEC.py`. Settings in `ana3AIMECCfg.ini`. + +In `ana3AIMECCfg.ini` and `probAnaCfg.ini`, the parameter `runoutLayer` and `layer` define which avalanche layer is used for the analysis (e.g. `L1` or `L2`). This selection is important, as the entire evaluation workflow (including AIMEC analysis, optimisation, and Morris sensitivity analysis) is performed using this selected layer. + +--- + +### Reference Data +To compute these goodness-of-fit metrics and to perform the AIMEC analysis, the following data must be provided in: `avaframe/data//Inputs`. The required folder structure is: Folder: @@ -54,7 +72,7 @@ The Morris sensitivity analysis provides a ranking of input parameters based on Before running `runMorrisSA.py`, the following step is required prior: -- Execute `runAna4ProbAnaCom8MoTPSA` +- Execute `runAna4ProbAnaCom8MoTPSA.py` - In `probAnaCfg.ini`: - Set the sampling method to `'morris'` - Define the number of Morris trajectories (`nSample`) diff --git a/avaframe/com8MoTPSA/com8MoTPSACfg.ini b/avaframe/com8MoTPSA/com8MoTPSACfg.ini index cf0a65075..71ed10960 100644 --- a/avaframe/com8MoTPSA/com8MoTPSACfg.ini +++ b/avaframe/com8MoTPSA/com8MoTPSACfg.ini @@ -162,20 +162,28 @@ Output format = ESRI_ASCII_Grid [Physical_parameters] Gravitational acceleration (m/s^2) = 9.81 +# Flow density for dense layer (rho) Density (kg/m^3) = 250.0 Rheology = Voellmy Parameters = auto +# Dry-friction cefficient (mu) Dry-friction coefficient (-) = 0.400 -Turbulent drag coefficient (-) = 0.0010 +# Turbulent drag coefficient (k_01) +Turbulent drag coefficient (-) = 0.0010 Effective drag height (m) = 0.0 Centrifugal effects = yes Passive earth-pressure coeff. (-) = 1.0 +# Basal drag coefficient (k_02) Basal drag coeff. 0-2 (-) = 0.008 +# Basal drag coefficient (k_12) Basal drag coeff. 1-2 (-) = 0.04 +# Top drag coefficient k_a2 Top drag coeff. (-) = 0.0001 +# Parabolic shape function of concentration (f_c) Conc_L2_Prof_Coeff_0 (-) = 1.3333 Conc_L2_Prof_Coeff_1 (-) = -0.66667 Conc_L2_Prof_Coeff_2 (-) = 0.0 +# Parabolic shape function of velocity (f_u) Speed_L2_Prof_Coeff_0 (-) = 1.4 Speed_L2_Prof_Coeff_1 (-) = 0.13333 Speed_L2_Prof_Coeff_2 (-) = -1.4 @@ -192,15 +200,22 @@ Entrainment L2 = none Erosion coefficient L1 (-) = 0.0 Bed strength profile = constant Bed friction coefficient (-) = 0.0 +# Bed density (rho_b) Bed density (kg/m^3) = 140.0 Deposition = yes +# Deposition density for dense layer (rho_d) Deposition density L1 (kg/m^3) = 400.0 Suspension model = TJSM +# Shear strength dense layer (tau_s) Avalanche shear strength (Pa) = 0.0 +# Shear strength deposit tau_d Avalanche shear strength deposit (Pa) = 0.0 +# Decay coefficient of the snow avalanche shear strength for suspension process (tau_su) Decay coeff. snow s. s. suspension (-) = 1.0 +# Decay coefficient of the snow avalanche shear strength for deposition process (tau_du) Decay coeff. snow s. s. deposition (-) = 1.0 Entrainment coeff. m12 (-) = 0.0 +# Settling speed of powder snow layer to dense layer (w_s) Deposition rate 21 (m/s) = 0.50 Constant density L1 = yes Evolving geometry = no From 9c1940a1cc6f80001adee016f1da8d21ba80cf03 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Tue, 10 Mar 2026 18:46:19 +0100 Subject: [PATCH 16/20] Add more info and layer handling in Cfg.ini files,swap scenario 1 and 2 (1 more important), --- .../ana6Optimisation/optimisationUtils.py | 46 +++++++++---------- avaframe/ana6Optimisation/runMorrisSACfg.ini | 23 ++++++---- .../ana6Optimisation/runOptimisationCfg.ini | 45 ++++++++++++------ 3 files changed, 69 insertions(+), 45 deletions(-) diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index 4803fbc37..1c8dd63fa 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -255,10 +255,9 @@ def buildFinalDF(avalancheDir, varParList, cfg): arealIndicatorDir = pathlib.Path(avalancheDir, 'Outputs', 'out1Peak', 'arealIndicators.pkl') indicatorsDF = readArealIndicators(arealIndicatorDir) - # remove rows ending with '_L1' - indicatorsDF = indicatorsDF.loc[~indicatorsDF["simName"].str.endswith("_L1")].copy() - # remove trailing '_L2' for merging on simName - indicatorsDF["simName"] = (indicatorsDF["simName"].str.replace(r"_L2$", "", regex=True)) + # Remove layer suffix _L1 or _L2 from simName for merging, the layer is provided by cfg['GENERAL']['layer']. + layer = cfg['GENERAL']['layer'] + indicatorsDF["simName"] = indicatorsDF["simName"].str.replace(fr"_{layer}$", "", regex=True) # Merge df's df_merged = pd.merge(paramSetDF, df_aimec, on='simName', how='inner') @@ -775,19 +774,19 @@ def loadVariationData(cfgOpt, outDir, avaDir): Load parameter bounds and selected parameters for optimisation. Two execution modes are supported, controlled via cfgOpt['PARAM_BOUNDS']['scenario']: - Scenario 1 (Morris pre-run): - - A Morris sensitivity analysis has already been executed. - - Ranked parameters and their bounds are loaded from - 'sa_parameters_bounds.pkl'. - - The top-N most influential parameters are selected for optimisation. - - Scenario 2 (Manual definition): + Scenario 1 (Manual definition): - No prior Morris screening. - Parameter names and corresponding bounds are loaded from a previously saved pickle file generated by ``runAna4ProbAnaCom8MoTPSA.py``. The parameter variation is therefore not defined within this function, but is determined by the configuration specified in ``probAnaCfg.ini``. The ``probAnaCfg.ini`` file contains the settings used to generate the initial sample set, including parameter ranges and variation rules. + Scenario 2 (Morris pre-run): + - A Morris sensitivity analysis has already been executed. + - Ranked parameters and their bounds are loaded from + 'sa_parameters_bounds.pkl'. + - The top-N most influential parameters are selected for optimisation. + Parameters ---------- @@ -811,13 +810,24 @@ def loadVariationData(cfgOpt, outDir, avaDir): avaName = avaDir.split('/')[-1] - # Scenario 1: Morris run prior + # Scenario 1: Morris is not run prior if scenario == 1: + # Load variation data with bounds from pickle file + inDir2 = pathlib.Path(avaDir, 'Outputs', "ana4Stats") + paramValuesD = pd.read_pickle(inDir2 / "paramValuesD.pickle") + paramSelected = paramValuesD['names'] + paramBounds = { + name: (float(bounds[0]), float(bounds[1])) + for name, bounds in zip(paramValuesD["names"], paramValuesD["bounds"]) + } + + # Scenario 2: Morris run prior + elif scenario == 2: # Load SA data and define how much parameters should be optimized (variation bounds included) SiDFSort = pd.read_pickle(outDir / f"{avaName}_sortedSAResultsWithBounds.pkl") topN = int(cfgOpt['PARAM_BOUNDS']['topN']) n_available = len(SiDFSort['Parameter']) - # Check if enougth input parameters are available + # Check if enough input parameters are available if topN > n_available: message = ( f"Invalid number of parameters topN={topN}. Only {n_available} parameters available from Morris sensitivity" @@ -827,16 +837,6 @@ def loadVariationData(cfgOpt, outDir, avaDir): paramSelected = list(SiDFSort['Parameter'][:topN]) paramBounds = dict(zip(paramSelected, SiDFSort['bounds'])) - # Scenario 2: Morris is not run prior - elif scenario == 2: - # Load variation data with bounds from pickle file - inDir2 = pathlib.Path(avaDir, 'Outputs', "ana4Stats") - paramValuesD = pd.read_pickle(inDir2 / "paramValuesD.pickle") - paramSelected = paramValuesD['names'] - paramBounds = { - name: (float(bounds[0]), float(bounds[1])) - for name, bounds in zip(paramValuesD["names"], paramValuesD["bounds"]) - } else: message = f"Unknown scenario '{scenario}' for variation data. Expected 1 or 2." log.error(message) diff --git a/avaframe/ana6Optimisation/runMorrisSACfg.ini b/avaframe/ana6Optimisation/runMorrisSACfg.ini index eb94c9b68..431f62e8e 100644 --- a/avaframe/ana6Optimisation/runMorrisSACfg.ini +++ b/avaframe/ana6Optimisation/runMorrisSACfg.ini @@ -5,21 +5,28 @@ # SA ... sensitivity analysis - [GENERAL] # USER input for running and plotting a comparison of simulation result to reference polygon +# These parameters correspond to the settings in runPlotAreaRefDiffs.py resType = ppr thresholdValueSimulation = 1 modName = com8MoTPSA +# layer to use for analysis (e.g. L1, L2) +layer = L2 + +# Output Settings: # in this folder the results will be saved, avalancheDir + this folder resultDir = Outputs/ana6MorrisSA [LOSS_PARAMETERS] -# alpha gives penalty if sim. overshoot the ref. (FP) -# beta gives penalty if sim. comes short compared to the ref. (FN) -tverskyAlpha = 2 -tverskyBeta = 1 -# Loss function is kombination of TverskyScore * weightTversky + RunoutNormalised * weight runout +# The loss function evaluates the agreement between simulation results and reference data. It is defined as a weighted sum of a modified Tversky score (1 - Tversky score) and the normalized runout length difference. Areal indicators are calculated only within the avalanche impact area, which is determined by the reference polygon and a cropshape polygon. + +# alpha gives penalty if simulation overshoots the reference (FP) +# beta gives penalty if simulation falls short of the reference (FN) +tverskyAlpha = 2 +tverskyBeta = 1 + +# Loss function: (1-TverskyScore) * weightTversky + RunoutNormalised * weightRunout weightTversky = 0.25 weightRunout = 0.75 @@ -27,8 +34,8 @@ weightRunout = 0.75 # confidence level and number of levels used for SA conf_level = 0.95 num_levels = 6 -# number of best morris samples for statistics -N = 10 +# number of best morris simulations used for statistics (topN < available model runs) +topN = 10 [MORRIS_CONVERGENCE] diff --git a/avaframe/ana6Optimisation/runOptimisationCfg.ini b/avaframe/ana6Optimisation/runOptimisationCfg.ini index e8b568091..9d1f66aa2 100644 --- a/avaframe/ana6Optimisation/runOptimisationCfg.ini +++ b/avaframe/ana6Optimisation/runOptimisationCfg.ini @@ -4,43 +4,58 @@ # (2) avaDirectory in avaframeCfg.ini need ../ as prefix # BO ... bayesian optimisation -# ei ... expected improvement +# EI ... expected improvement [GENERAL] # USER input for running and plotting a comparison of simulation result to reference polygon +# These parameters correspond to the settings in runPlotAreaRefDiffs.py resType = ppr thresholdValueSimulation = 1 modName = com8MoTPSA +# layer to use for analysis (e.g. L1, L2) +layer = L2 + +# Output Settings: # in this folder the results will be saved, avalancheDir + this folder resultDir = Outputs/ana6Optimisation [PARAM_BOUNDS] # 2 scenarios: choose 1 or 2 +# (1): morris is not run prior, this is the preferred scenario, input parameters and their bounds are read from a previously saved pickle file generated by ``runAna4ProbAnaCom8MoTPSA.py``. The parameter variation is determined by the configuration specified in ``probAnaCfg.ini``. +# (2): morris is run prior, in this case, a dataframe of ranked input parameters is already saved by runMorrisSA.py as pickle file, The optimisation is then based on morris samples, this is experimental, since Morris samples do not provide optimal coverage of the input space. +# The user only needs to determine how much input topN parameters to use for optimisation. scenario = 1 -#(1): morris is run prior, then dataframe of ranked input parameters is already saved by runMorris.py as pickle file, user only need to determine how much input paramters to use for optimisation -topN = 3 -# (2): morris is not run prior, input parameters and their bounds are read from a previously saved pickle file generated by ``runAna4ProbAnaCom8MoTPSA.py``. The parameter variation is determined by the configuration specified in ``probAnaCfg.ini``. + +# topN is only relevant for scenario 2: number of top ranked input parameters to use for optimisation +topN = 7 [LOSS_PARAMETERS] -# alpha gives penalty if sim. overshoot the ref. (FP) -# beta gives penalty if sim. comes short compared to the ref. (FN) +# The loss function evaluates the agreement between simulation results and reference data. It is defined as a weighted sum of a modified Tversky score (1 - Tversky score) and the normalized runout length difference. Areal indicators are calculated only within the avalanche impact area, which is determined by the reference polygon and a cropshape polygon. + +# alpha gives penalty if simulation overshoots the reference (FP) +# beta gives penalty if simulation falls short of the reference (FN) tverskyAlpha = 2 tverskyBeta = 1 +# Loss function: (1-TverskyScore) * weightTversky + RunoutNormalised * weightRunout weightTversky = 0.25 weightRunout = 0.75 [OPTIMISATION] +# Surrogate-based optimisation using a Gaussian Process (GP) surrogate. +# Non-sequential optimisation evaluates a fixed set of Latin Hypercube simulations to train the surrogate and predict the loss. +# Bayesian optimisation also uses the GP surrogate but sequentially selects new simulations using the Expected Improvement acquisition function. + # Type of optimisation: seq or nonseq optType = nonseq # define the smoothness of the matern kernel of the surrogate, e.g.: 0.5, 1.5, 2.5 matern_nu = 2.5 # define k for k-fold cross validation -k = 5 +k = 5 # sampling seed for generation of Latin Hypercube samples used for loss-prediction with surrogate seed = 12345 # number of LH samples @@ -48,17 +63,19 @@ n_lhs = 1000000 # determine the number N of surrogate's best simulations used for statistics n_surrogate_top = 100 # determine the number N of models best simulations used for statistics (n < available model runs) -n_model_top = 6 +n_model_top = 10 # settings for seq: -# xi controls the exploration–exploitation balance in Expected Improvement -# Increasing xi encourages exploration (sampling uncertain regions) -# Decreasing xi favors exploitation (sampling near current optimum) +# xi controls the exploration–exploitation balance in the Expected Improvement acquisition function. +# Increasing xi (e.g., xi = 0.01) encourages exploration (sampling uncertain regions). +# Decreasing xi (e.g., xi = 0.0) favors exploitation (sampling near the current optimum). +# In practice, xi is typically chosen as a small non-negative value (e.g., 0.0–0.1). xi = 0.0 -# if ei is smaller than threshold, the simulation is counted as 'good', if number of good simulations is reached, optimisation stops + +# If EI falls below the threshold, the simulation is counted as 'good'. The sequential optimisation stops once the specified number of good simulations is reached. Otherwise, the optimisation terminates after bo_max_iterations. eiThreshold = 0.01 -numberOfGoodSimulations = 5 -bo_max_iterations = 20 +numberOfGoodSimulations = 10 +bo_max_iterations = 40 From e0c03072b14363200890f1fd6c23bfec6a97a2bb Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Thu, 12 Mar 2026 09:33:01 +0100 Subject: [PATCH 17/20] Revise README_ana6.md --- avaframe/ana6Optimisation/README_ana6.md | 152 ++++++++++++++++------- 1 file changed, 110 insertions(+), 42 deletions(-) diff --git a/avaframe/ana6Optimisation/README_ana6.md b/avaframe/ana6Optimisation/README_ana6.md index 29cd59d87..fe2b84541 100644 --- a/avaframe/ana6Optimisation/README_ana6.md +++ b/avaframe/ana6Optimisation/README_ana6.md @@ -1,6 +1,9 @@ # ana6 – Sensitivity Analysis & Optimisation -The `ana6Optimisation` module provides tools for performing Morris sensitivity analysis and parameter optimisation within the AvaFrame workflow. It supports input parameter ranking, convergence analysis of sensitivity indices and surrogate-based optimisation strategies. The module can be used either sequentially (Morris analysis followed by optimisation) or independently for direct optimisation. +The `ana6Optimisation` module provides tools for performing Morris sensitivity analysis and parameter optimisation +within the AvaFrame workflow. It supports input parameter ranking, convergence analysis of sensitivity indices and +surrogate-based optimisation strategies. The module can be used either sequentially (Morris analysis followed by +optimisation) or independently for direct optimisation. --- @@ -16,67 +19,88 @@ The module contains the following files: --- ## Workflow + ### Working Directory The above mentioned runScripts must be executed within the directory: `avaframe/ana6Optimisation` -In `avaframeCfg.ini`, the avalanche reference directory (`avalancheDir`) must include the suffix `../`, for example: `../data/avaFleisskar` +In `avaframeCfg.ini`, the avalanche reference directory (`avalancheDir`) must include the suffix `../`, for example: +`../data/avaFleisskar` This ensures correct relative path resolution within the AvaFrame project structure. --- -### Loss Function +### Loss Function and Configuration Settings -The optimisation compares avalanche simulations with a reference runout polygon stored in `REFDATA`. Model performance is evaluated using a weighted loss function combining: +The optimisation compares avalanche simulations with a reference runout polygon. Model performance is evaluated using a +weighted loss function combining: - a modified Tversky score *(1 − Tversky)* - the normalized RMSE of the runout length between simulation and reference -The Tversky score is computed from areal indicators (TP, FP,FN) within a predefined cropshape. The areal indicators are calculated using `runPlotAreaRefDiffs.py`. Settings in `runMorrisSACfg.ini` and `runOptimisationCfg.ini`. +The Tversky score is computed from areal indicators (TP, FP, FN) within a predefined cropshape. The areal indicators are +calculated using `runPlotAreaRefDiffs.py`. Settings in `runMorrisSACfg.ini` and `runOptimisationCfg.ini`. + +The normalized runout length is computed using variables of the AIMEC analysis implemented in `ana3AIMEC.py`. Settings +are defined in `ana3AIMECCfg.ini`. -The normalized runout length is computed using the AIMEC analysis implemented in `ana3AIMEC.py`. Settings in `ana3AIMECCfg.ini`. +In `ana3AIMECCfg.ini`, `probAnaCfg.ini`, `runMorrisSACfg.ini` (for Morris SA) and `runOptimisationCfg.ini` (for +optimisation), the parameter runoutLayer or layer defines which avalanche layer is used for the analysis (e.g. L1 or +L2). For now, the selected layer must be specified consistently in all configuration files. This is important because +the entire evaluation workflow (including AIMEC analysis, optimisation, and Morris sensitivity analysis) is performed +using this layer. -In `ana3AIMECCfg.ini` and `probAnaCfg.ini`, the parameter `runoutLayer` and `layer` define which avalanche layer is used for the analysis (e.g. `L1` or `L2`). This selection is important, as the entire evaluation workflow (including AIMEC analysis, optimisation, and Morris sensitivity analysis) is performed using this selected layer. +For the optimisation workflow, the following parameters must additionally be set in `ana3AIMECCfg.ini`: + +- `resTypes = ppr` +- `anaMod = com8MoTPSA` +- `flagMass = False` (since `com8MoTPSA` currently does not produce a mass file) +- `includeReference = True` --- ### Reference Data -To compute these goodness-of-fit metrics and to perform the AIMEC analysis, the following data must be provided in: `avaframe/data//Inputs`. + +To compute these goodness-of-fit metrics and to perform the AIMEC analysis, the following data must be provided in: +`avaframe/data//Inputs`. The required folder structure is: Folder: + - **LINES** - Contains the AIMEC path. + Contains the AIMEC path as `path_aimec.shp` file. - **POLYGONS** - Contains Cropshape and defines the maximal extent of runout area that is used for calculating areal indicators. + Contains the cropshape and defines the maximal extent of runout area that is used for calculating areal indicators. + This shapefile must have the suffix `_cropshape.shp`. - **REFDATA** - Defines the runout area of the reference event. + Defines the runout area of the reference event. This shapefile must have the suffix `_POLY.shp`. - **REL** Defines the release area of the avalanche event. - + File: + - **Digital Elevation Model (DEM)** Must be placed directly in the `Inputs` directory and must cover the entire affected area. - -More Details here: https://docs.avaframe.org/en/latest/moduleCom1DFA.html + +More details here in the section `Inputs`: https://docs.avaframe.org/en/latest/moduleCom1DFA.html ___ ### Morris Sensitivity Analysis (MorrisSA) - + The Morris sensitivity analysis provides a ranking of input parameters based on their influence on the model response. Before running `runMorrisSA.py`, the following step is required prior: - Execute `runAna4ProbAnaCom8MoTPSA.py` - In `probAnaCfg.ini`: - - Set the sampling method to `'morris'` - - Define the number of Morris trajectories (`nSample`) - - Select the input parameters and define their variation bounds + - Set the sampling method to `'morris'` + - Define the number of Morris trajectories (`nSample`) + - Select the input parameters and define their variation bounds This step generates the required simulations and stores the sampled parameters and their bounds in a pickle file. @@ -89,23 +113,23 @@ This step generates the required simulations and stores the sampled parameters a **Outputs:** - Pickle file containing: - - Ranked input parameters - - Morris sensitivity indices - - Parameter bounds + - Ranked input parameters + - Morris sensitivity indices + - Parameter bounds - Visualisation plots of the sensitivity results --- ### Morris Convergence Analysis -The convergence analysis evaluates how the Morris sensitivity indices stabilise with increasing numbers of trajectories. Its purpose is to determine the minimum number of trajectories that yields robust results. +The convergence analysis evaluates how the Morris sensitivity indices stabilise with increasing numbers of trajectories. +Its purpose is to determine the minimum number of trajectories that yields robust results. **Requirements:** - Run `runAna4ProbAnaCom8MoTPSA` multiple times with different numbers of Morris trajectories - Rename Output folders afterwards with the following naming convention: OutputsR`` - where `` corresponds to the number of trajectories This process is computationally expensive, as it requires a large number of simulations. @@ -120,39 +144,83 @@ This process is computationally expensive, as it requires a large number of simu --- -### Optimisation +### Optimisation Strategies + +The optimisation process identifies the set of input parameters that yields the best agreement between simulation +results and a defined reference. “Best” is defined by the objective function implemented in the optimisation routine. +The optimisation problem is formulated as a minimisation of the loss function, where lower values indicate better +agreement between simulation results and the reference data. + +Two surrogate-based optimisation strategies are implemented. In both approaches, a Gaussian Process (GP) surrogate model +is used to approximate the loss function. The surrogate is trained using results from avalanche simulations and provides +predictions of the loss together with an estimate of the prediction uncertainty. + +#### Surrogate-based Non-sequential Optimisation + +In the non-sequential approach, a trained surrogate predicts the loss for a large number of parameter combinations +generated using Latin Hypercube Sampling (LHS). Parameter sets with the lowest predicted loss values are identified and +analysed statistically. Avalanche simulations are then performed for the best predicted parameter sets as well as for +the mean parameter values derived from the top-performing combinations. -The optimisation process identifies the set of input parameters that yields the best agreement between simulation results and a defined reference. "Best" is defined by the objective function implemented in the optimisation routine. +#### Surrogate-based Bayesian (Sequential) Optimisation -Optimisation can be performed in two ways: +In Bayesian optimisation, the GP surrogate model is updated iteratively. The procedure starts with a small initial set +of evaluated avalanche simulations. Based on these results, the surrogate model is trained and used to guide the +selection of new simulation points. The next parameter set is selected using the Expected Improvement (EI) acquisition +function. EI balances two objectives: -**With prior Morris analysis:** - - Parameter ranking is available - - Parameter bounds are already defined - - Execute `runOpmisiation.py` with scenario 1 in `runOptimisationCfg.ini` +- Exploitation – sampling regions where the surrogate predicts low loss values. +- Exploration – sampling regions with high predictive uncertainty. -**Without prior Morris analysis:** - - Execute `runAna4ProbAnaCom8MoTPSA.py` to generate some initial samples (for surrogate) - - In `probAnaCfg.ini`: - - Set the sampling method to `'latin'` - - Define the number of model runs (`nSample`) - - Select the input parameters and define their variation bounds - - Execute `runOpmisiation.py` with scenario 2 in `runOptimisationCfg.ini` +After each new avalanche simulation, the GP surrogate model is updated and the process is repeated. The optimisation +stops once a stopping criterion is reached (e.g. maximum number of iterations or very small EI values for several +iterations). -**Two optimisation strategies are implemented:** -- Surrogate-based non-sequential optimisation -- Surrogate-based Bayesian (sequential) optimisation +--- + +### Optimisation Workflow + +The optimisation can be performed using either non-sequential surrogate-based optimisation or sequential Bayesian +optimisation. The optimisation strategy can be selected in `runOptimisationCfg.ini` via the parameter `optType`. + +Independently of the chosen optimisation strategy, the workflow can be configured in two ways depending on whether +Morris sensitivity analysis is used before: + +**Scenario 1: Without prior Morris analysis (recommended):** + +- Execute `runAna4ProbAnaCom8MoTPSA.py` to generate some initial samples (for surrogate) +- In `probAnaCfg.ini`: + - Set the sampling method to `'latin'` + - Define the number of model runs (`nSample`) + - Select the input parameters and define their variation bounds +- Execute `runOptimisation.py` with scenario 1 in `runOptimisationCfg.ini` + +This is the standard scenario, as Latin Hypercube Sampling provides good coverage of the parameter space, which is +important for training the surrogate model. + +**Scenario 2: With prior Morris analysis:** + +- Parameter ranking is available +- Parameter bounds are already defined +- Execute `runOptimisation.py` with scenario 2 in `runOptimisationCfg.ini` + +This option is mainly intended for experimental use. Morris samples are designed for sensitivity analysis and parameter +ranking, but they do not provide an optimal coverage of the parameter space for surrogate-based optimisation. + +--- **Outputs:** - Optimal parameter set -- Visualisation plots of the optimisations results and progress +- Visualisation plots of the optimisation results and progress --- ## Notes -- Performing Morris sensitivity analysis before optimisation is recommended to reduce the parameter space. +- Performing Morris sensitivity analysis before optimisation is recommended to reduce the parameter space. However, + using Morris samples directly for optimisation is not recommended, since they do not provide optimal coverage of the + input parameter space. - Convergence analysis significantly increases computational cost. - All workflows are controlled via `.ini` configuration files. \ No newline at end of file From 9c6432c435709f3ed111577fd66631bdcb2531fe Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Thu, 12 Mar 2026 12:34:33 +0100 Subject: [PATCH 18/20] Use cfg.getint(...) instead of int(cfg(...)), swap scenario 1 and 2 (consistent with runOptimisationCfg.ini) and add comments. --- .../ana6Optimisation/optimisationUtils.py | 32 +++++++++---------- avaframe/ana6Optimisation/runMorrisSA.py | 17 +++++----- avaframe/ana6Optimisation/runOptimisation.py | 8 ++--- avaframe/out3Plot/outAna6Plots.py | 16 +++++----- 4 files changed, 37 insertions(+), 36 deletions(-) diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index 1c8dd63fa..fe0bb79cf 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -47,7 +47,7 @@ def calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC): # ToDo: to reduce comp. cost: run AIMEC and calcArealIndicators only for new simulations # Areal indicators resType = cfgOpt["GENERAL"]["resType"] - thresholdValueSimulation = float(cfgOpt["GENERAL"]["thresholdValueSimulation"]) + thresholdValueSimulation = cfgOpt.getfloat("GENERAL", "thresholdValueSimulation") modName = cfgOpt["GENERAL"]["modName"] runPlotAreaRefDiffs(resType, thresholdValueSimulation, modName) @@ -188,8 +188,8 @@ def addLossMetrics(df, referenceDF, cfg): df["f1_score"] = np.where(denomF1 != 0, 2.0 * df['precision'] * df['recall'] / denomF1, 0.0) # Tversky score = TP / (TP + alpha * FP + beta * FN), gives penalty to overshoot --> alpha - alpha = float(cfg['LOSS_PARAMETERS']['tverskyAlpha']) - beta = float(cfg['LOSS_PARAMETERS']['tverskyBeta']) + alpha = cfg.getfloat('LOSS_PARAMETERS', 'tverskyAlpha') + beta = cfg.getfloat('LOSS_PARAMETERS', 'tverskyBeta') denomTversky = TP + alpha * FP + beta * FN df["tversky_score"] = np.where(denomTversky != 0, TP / denomTversky, 0.0) @@ -203,8 +203,8 @@ def addLossMetrics(df, referenceDF, cfg): df['runoutRMSENormalised'] = runoutRMSE / sRunoutRef # 0 is good, 1 is bad # Weights - wTversky = float(cfg['LOSS_PARAMETERS']['weightTversky']) - wRunout = float(cfg['LOSS_PARAMETERS']['weightRunout']) + wTversky = cfg.getfloat('LOSS_PARAMETERS', 'weightTversky') + wRunout = cfg.getfloat('LOSS_PARAMETERS', 'weightRunout') df['optimisationVariable'] = ( wRunout * df['runoutRMSENormalised'].fillna(1) + wTversky * df['1-tversky'].fillna(1)) @@ -326,7 +326,7 @@ def fitSurrogate(df, cfgOpt): n_features = X.shape[1] # GP kernel with Matern-Covariance - matern_nu = float(cfgOpt['OPTIMISATION']['matern_nu']) + matern_nu = cfgOpt.getfloat('OPTIMISATION', 'matern_nu') kernel = ( ConstantKernel(1.0, (1e-3, 1e3)) # Output variance tells how strong Y varies * Matern(length_scale=np.ones(n_features), @@ -380,7 +380,7 @@ def KFoldCrossValidation(X, y, pipe, cfgOpt, outDir, avaName, pipeName): rmse_scorer = "neg_root_mean_squared_error" mae_scorer = "neg_mean_absolute_error" r2_scorer = "r2" - k = int(cfgOpt['OPTIMISATION']['k']) + k = cfgOpt.getint('OPTIMISATION', 'k') cv = KFold(n_splits=k, shuffle=True, random_state=0) scores = cross_validate( @@ -395,7 +395,7 @@ def KFoldCrossValidation(X, y, pipe, cfgOpt, outDir, avaName, pipeName): for c in neg_cols: scores[c] = -scores[c] # get smoothness parameter nu - matern_nu = float(cfgOpt['OPTIMISATION']['matern_nu']) + matern_nu = cfgOpt.getfloat('OPTIMISATION', 'matern_nu') row = { "experiment_name": pipeName, "n_samples": X.shape[0], @@ -455,8 +455,8 @@ def optimiseNonSeq(pipe, cfgOpt, paramBounds): d = bounds.shape[0] # Create LH samples - seed = int(cfgOpt['OPTIMISATION']['seed']) - n_lhs = int(cfgOpt['OPTIMISATION']['n_lhs']) + seed = cfgOpt.getint('OPTIMISATION', 'seed') + n_lhs = cfgOpt.getint('OPTIMISATION', 'n_lhs') sampler = qmc.LatinHypercube(d=d, seed=seed) sample = sampler.random(n=n_lhs) X0 = qmc.scale(sample, bounds[:, 0], bounds[:, 1]) @@ -465,7 +465,7 @@ def optimiseNonSeq(pipe, cfgOpt, paramBounds): mu, sigma = pipe.predict(X0, return_std=True) # Convert X0 to pandas df for analyze function df_candidates = pd.DataFrame(X0, columns=paramSelected) - n_top_samples = int(cfgOpt['OPTIMISATION']['n_surrogate_top']) + n_top_samples = cfgOpt.getint('OPTIMISATION', 'n_surrogate_top') topNStat, _ = analyzeTopCandidates(df_candidates, mu, sigma, paramSelected, N=n_top_samples) return topNStat @@ -647,8 +647,8 @@ def EINextPoint(pipe, y, paramBounds, cfgOpt): f_best = np.nanmin(y) # Create LH samples - seed = int(cfgOpt['OPTIMISATION']['seed']) - n_lhs = int(cfgOpt['OPTIMISATION']['n_lhs']) + seed = cfgOpt.getint('OPTIMISATION', 'seed') + n_lhs = cfgOpt.getint('OPTIMISATION', 'n_lhs') sampler = qmc.LatinHypercube(d=d, seed=seed) sample = sampler.random(n=n_lhs) X0 = qmc.scale(sample, bounds[:, 0], bounds[:, 1]) @@ -661,7 +661,7 @@ def EINextPoint(pipe, y, paramBounds, cfgOpt): log.info(" sigma mean=%.6f max=%.6f", sigma.mean(), sigma.max()) # EI or LCB for minimization - xi = float(cfgOpt['OPTIMISATION']['xi']) + xi = cfgOpt.getfloat('OPTIMISATION', 'xi') ei = expectedImprovement(mu, sigma, f_best, xi) lcb = lowerConfidenceBound(mu, sigma) @@ -806,7 +806,7 @@ def loadVariationData(cfgOpt, outDir, avaDir): List of selected parameter names used for optimisation. """ # Read scenario flag - scenario = int(cfgOpt['PARAM_BOUNDS']['scenario']) + scenario = cfgOpt.getint('PARAM_BOUNDS', 'scenario') avaName = avaDir.split('/')[-1] @@ -825,7 +825,7 @@ def loadVariationData(cfgOpt, outDir, avaDir): elif scenario == 2: # Load SA data and define how much parameters should be optimized (variation bounds included) SiDFSort = pd.read_pickle(outDir / f"{avaName}_sortedSAResultsWithBounds.pkl") - topN = int(cfgOpt['PARAM_BOUNDS']['topN']) + topN = cfgOpt.getint('PARAM_BOUNDS', 'topN') n_available = len(SiDFSort['Parameter']) # Check if enough input parameters are available if topN > n_available: diff --git a/avaframe/ana6Optimisation/runMorrisSA.py b/avaframe/ana6Optimisation/runMorrisSA.py index f82ed0618..6319064fb 100644 --- a/avaframe/ana6Optimisation/runMorrisSA.py +++ b/avaframe/ana6Optimisation/runMorrisSA.py @@ -63,11 +63,11 @@ def runMorrisSA(): problem, samples, Y, - conf_level=float(cfgMorrisSA['MORRIS SA']['conf_level']), - num_levels=int(cfgMorrisSA['MORRIS SA']['num_levels']) + conf_level=cfgMorrisSA.getfloat('MORRIS SA', 'conf_level'), + num_levels=cfgMorrisSA.getint('MORRIS SA', 'num_levels') ) - # Rank Parameters + # Creating dict from Si SiData = { "Parameter": Si['names'], "mu_star": Si['mu_star'], @@ -98,19 +98,20 @@ def runMorrisSA(): # Create df with parameters and the loss function for summary statistics paramLossDF, paramLossScaledDF = optimisationUtils.createDFParameterLoss(morrisDF, SiDFSort['Parameter']) + # topN is the number of best morris samples to use for statistics # Raise error if topN is bigger than available model runs - N = int(cfgMorrisSA['MORRIS SA']['N']) - if N > nAvailable: + topN = cfgMorrisSA.getint('MORRIS SA', 'topN') + if topN > nAvailable: message = ( - f"Invalid configuration: Requested top {N} simulations, but only {nAvailable} Morris simulations are " + f"Invalid configuration: Requested top {topN} simulations, but only {nAvailable} Morris simulations are " "available." ) raise ValueError(message) - paramLossSubsetDF = paramLossDF.sort_values(by='Loss', ascending=True)[:N] + paramLossSubsetDF = paramLossDF.sort_values(by='Loss', ascending=True)[:topN] # Save mean values of best input parameters as csv date = datetime.now().strftime("%Y%m%d") - csvPath = f"{outDir}/{avaName}_MorrisBEST{N}Simulations_{date}.csv" + csvPath = f"{outDir}/{avaName}_MorrisBEST{topN}Simulations_{date}.csv" paramLossSubsetDF.describe().to_csv(csvPath) diff --git a/avaframe/ana6Optimisation/runOptimisation.py b/avaframe/ana6Optimisation/runOptimisation.py index d13a8667e..47bb17d78 100644 --- a/avaframe/ana6Optimisation/runOptimisation.py +++ b/avaframe/ana6Optimisation/runOptimisation.py @@ -118,10 +118,10 @@ def runOptimisation(): # Save sim with currently best y saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, csv_path=csv_path) - eiThreshold = float(cfgOpt['OPTIMISATION']['eiThreshold']) - nGoodSims = float(cfgOpt['OPTIMISATION']['numberOfGoodSimulations']) + eiThreshold = cfgOpt.getfloat('OPTIMISATION', 'eiThreshold') + nGoodSims = cfgOpt.getfloat('OPTIMISATION', 'numberOfGoodSimulations') countGoodSims = 0 - bo_max_iterations = int(cfgOpt['OPTIMISATION']['bo_max_iterations']) + bo_max_iterations = cfgOpt.getint('OPTIMISATION', 'bo_max_iterations') # Before the loop: start counter based on existing BO ("seq") simulations # Important to avoid same order number across different BO runs @@ -176,7 +176,7 @@ def runOptimisation(): title=f"{avaName}, Seq-Analysis: Best Model Runs") # Save BO plots - n_top_samples = int(cfgOpt['OPTIMISATION']['n_model_top']) + n_top_samples = cfgOpt.getint('OPTIMISATION', 'n_model_top') emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) saveResults.BOConvergencePlot(finalDF, avaName, outDir, cfgOpt) diff --git a/avaframe/out3Plot/outAna6Plots.py b/avaframe/out3Plot/outAna6Plots.py index 334770938..f48647880 100644 --- a/avaframe/out3Plot/outAna6Plots.py +++ b/avaframe/out3Plot/outAna6Plots.py @@ -162,7 +162,7 @@ def scatterplotUncertaintySA(SiDF, avaName, outDir): def BOConvergencePlot(finalDF, avaName, outDir, cfgOpt): """ Visualises the evolution of the optimisation variable (loss) across sampling phases: - 1. Morris (scenario 1) OR Latin hypercube (scenario 2) + 1. Latin hypercube (scenario 1) OR Morris (scenario 2) 2. Bayesian optimisation (EI/LCB) -> sampleMethods == "seq" 3. Non-sequential surrogate-based sampling -> sampleMethods == "nonseq" @@ -181,7 +181,7 @@ def BOConvergencePlot(finalDF, avaName, outDir, cfgOpt): Configuration object containing the section 'PARAM_BOUNDS'. """ # Read scenario flag - scenario = int(cfgOpt["PARAM_BOUNDS"]["scenario"]) + scenario = cfgOpt.getint("PARAM_BOUNDS", "scenario") # Color palette c_best = "#4e79a7" # Blue @@ -194,13 +194,13 @@ def BOConvergencePlot(finalDF, avaName, outDir, cfgOpt): # Decide which method represents the initial sampling phase if scenario == 1: - initial_method = "morris" - initial_label = "Morris" - initial_color = c_morris - elif scenario == 2: initial_method = "latin" initial_label = "LatinHypercube" initial_color = c_lhs + elif scenario == 2: + initial_method = "morris" + initial_label = "Morris" + initial_color = c_morris else: message = ( f"Unknown scenario '{scenario}' in cfgOpt['PARAM_BOUNDS']['scenario']. " @@ -627,7 +627,7 @@ def _get_optvar_for_sim(sim_name, label): if simNameBest is not None: df_top.loc["optimisationVariable", "best"] = _get_optvar_for_sim(simNameBest, "simNameBest") - n_surrogate_top = int(cfgOpt["OPTIMISATION"]["n_surrogate_top"]) + n_surrogate_top = cfgOpt.getint("OPTIMISATION", "n_surrogate_top") mean_tag = f" ({simNameMean})" if simNameMean else "" best_tag = f" ({simNameBest})" if simNameBest else "" @@ -646,7 +646,7 @@ def _get_optvar_for_sim(sim_name, label): best_idx = finalDF["optimisationVariable"].idxmin() # Raise error if topN is bigger than available model runs - n_model_top = int(cfgOpt["OPTIMISATION"]["n_model_top"]) + n_model_top = cfgOpt.getint("OPTIMISATION", "n_model_top") nAvailable = len(finalDF.index) if n_model_top > nAvailable: message = ( From 39b8dbe8b7d690281f6ab2dd0c10fd916a217216 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Thu, 12 Mar 2026 14:07:37 +0100 Subject: [PATCH 19/20] Improve documentation, construct the filename of AIMEC results from AIMECcfg.ini settings and not pass AIMEC module for calcArealIndicatorsAndAIMEC --- .../ana6Optimisation/optimisationUtils.py | 75 ++++++++++++------- avaframe/ana6Optimisation/runMorrisSA.py | 3 +- avaframe/ana6Optimisation/runOptimisation.py | 29 +++---- avaframe/out3Plot/outAna6Plots.py | 2 +- 4 files changed, 68 insertions(+), 41 deletions(-) diff --git a/avaframe/ana6Optimisation/optimisationUtils.py b/avaframe/ana6Optimisation/optimisationUtils.py index fe0bb79cf..eecf6d540 100644 --- a/avaframe/ana6Optimisation/optimisationUtils.py +++ b/avaframe/ana6Optimisation/optimisationUtils.py @@ -18,6 +18,7 @@ from avaframe.in3Utils import cfgUtils from avaframe.in3Utils import fileHandlerUtils as fU from avaframe.com8MoTPSA import com8MoTPSA +from avaframe.ana3AIMEC import ana3AIMEC from avaframe.ana4Stats import probAna from avaframe.runScripts.runPlotAreaRefDiffs import runPlotAreaRefDiffs import avaframe.out3Plot.outAna6Plots as saveResults @@ -26,9 +27,10 @@ log = logging.getLogger("avaframe") -def calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC): +def calcArealIndicatorsAndAimec(cfgOpt, avalancheDir): """ - Calculate areal indicators between reference polygon and simulation and AIMEC analysis and save data in ana3AIMEC and out1Peak. + Calculate areal indicators between reference polygon and simulation and AIMEC analysis and save data in ana3AIMEC + and out1Peak. Parameters ---------- @@ -39,9 +41,7 @@ def calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC): - thresholdValueSimulation - modName avalancheDir : str - Directory containing the directory of the reference avalanche - ana3AIMEC : module - AIMEC analysis module providing `fullAimecAnalysis()`. + Path to the avalanche directory. """ # ToDo: to reduce comp. cost: run AIMEC and calcArealIndicators only for new simulations @@ -216,7 +216,11 @@ def buildFinalDF(avalancheDir, varParList, cfg): Build the final merged DataFrame for a given avalanche. Combines parameter sets, AIMEC results, and areal indicators into one DataFrame, - then computes evaluation metrics 'via addLossMetrics'. + then computes evaluation metrics 'via addLossMetrics'. The resulting DataFrame contains one row per simulation. + If simulations are available for two layers (e.g. L1 and L2), only the layer specified in the configuration files + is considered. Simulations from the selected layer are kept, while simulations from the other layer are excluded + from the final DataFrame. The analysis layer must be defined in the corresponding .ini configuration files. Further + details are provided in README_ana6.md. Parameters ---------- @@ -231,9 +235,11 @@ def buildFinalDF(avalancheDir, varParList, cfg): ------- finalDF : pandas.DataFrame Final DataFrame containing: - - ``simName`` - - ``parameterSet`` - - ``order`` + - `simName` + - `parameterSet` + - `order`: used later for visualisation. Morris, Latin hypercube and sequential samples are assigned an order + index. For each sampling method, the index starts at 0 and increases in increments of 1 up to the + total number of samples of that method. - Areal indicator columns - Evaluation metrics (recall, precision, f1_score, tversky_score, optimisationVariable) """ @@ -245,9 +251,16 @@ def buildFinalDF(avalancheDir, varParList, cfg): # Read parameterSetDF paramSetDF = readParamSetDF(inDir, varParList) - # Dataframe from AIMEC - df_aimec = pd.read_csv( - avalancheDir + '/Outputs/ana3AIMEC/com8MoTPSA/Results_' + avaName + '_ppr_lim_1_w_600resAnalysisDF.csv') + # Get Dataframe from AIMEC analysis + cfgAIMEC = cfgUtils.getModuleConfig(ana3AIMEC) + runoutResTypes = cfgAIMEC['AIMECSETUP']['resTypes'] + thresholdValue = cfgAIMEC['AIMECSETUP']['thresholdValue'] + domainWidth = cfgAIMEC['AIMECSETUP']['domainWidth'] + + AIMECPath = avalancheDir + '/Outputs/ana3AIMEC/com8MoTPSA/' + AIMECFileName = f"Results_{avaName}_{runoutResTypes}_lim_{thresholdValue}_w_{domainWidth}resAnalysisDF.csv" + + df_aimec = pd.read_csv(AIMECPath + AIMECFileName) # Get data of the reference in AIMEC referenceDF = pd.read_csv(f"{avalancheDir}/Outputs/ana3AIMEC/com8MoTPSA/referenceDF.csv") @@ -271,13 +284,17 @@ def buildFinalDF(avalancheDir, varParList, cfg): def createDFParameterLoss(df, paramSelected): """ Create DataFrames linking selected parameters with the loss function. + The meaning of selected depends on the chosen scenario. If Morris sensitivity analysis is not run beforehand, all + varied input parameters from probAnaCfg.ini are included. If Morris sensitivity analysis is run beforehand, only the topN + highest-ranked parameters are selected based on the Morris results. Parameters ---------- df : pandas.DataFrame - DataFrame that contains the selected parameters and their values as well as the loss function. + DataFrame that contains the finalDF including the input parameters and their values as well as the loss function. paramSelected : list of str - Subset of parameters to include in the output DataFrames. + List of parameters to include in the output DataFrames. This determines which parameters will be used for + optimisation and depends on the scenario. Returns ------- @@ -769,32 +786,40 @@ def findSimName(finalDF, paramValue, atol=1e-6): return matches.iloc[0] -def loadVariationData(cfgOpt, outDir, avaDir): +def loadVariationData(cfgOpt, avaDir, outDir=None): """ - Load parameter bounds and selected parameters for optimisation. Two execution modes are supported, controlled via - cfgOpt['PARAM_BOUNDS']['scenario']: + Load parameter bounds and selected parameters for optimisation. + + The used parameters and their bounds are not defined directly in this + function. Instead, they are obtained from results of previous simulation runs. + + Two execution modes are supported and controlled via cfgOpt['PARAM_BOUNDS']['scenario']: Scenario 1 (Manual definition): - No prior Morris screening. - - Parameter names and corresponding bounds are loaded from a previously saved pickle file generated by - ``runAna4ProbAnaCom8MoTPSA.py``. The parameter variation is therefore not defined within this function, but is - determined by the configuration specified in ``probAnaCfg.ini``. The ``probAnaCfg.ini`` file contains the - settings used to generate the initial sample set, including parameter ranges and variation rules. + - Parameter names and corresponding bounds are loaded from a previously saved pickle file 'paramValuesD.pickle' + generated by ``runAna4ProbAnaCom8MoTPSA.py``. The parameter variation is therefore not defined within this + function, but is determined by the configuration specified in ``probAnaCfg.ini``. The ``probAnaCfg.ini`` file + contains the settings used to generate the initial sample set, including parameter ranges and variation rules. + + This is the standard scenario, as Latin Hypercube Sampling provides good coverage of the parameter space, which is + important for training the surrogate model. Scenario 2 (Morris pre-run): - A Morris sensitivity analysis has already been executed. - - Ranked parameters and their bounds are loaded from - 'sa_parameters_bounds.pkl'. + - Ranked parameters and their bounds are loaded from 'sa_parameters_bounds.pkl' and morris samples are directly + used for optimisation. - The top-N most influential parameters are selected for optimisation. + This option is mainly intended for experimental use. Morris samples are designed for sensitivity analysis and + parameter ranking, but they do not provide an optimal coverage of the parameter space for surrogate-based optimisation. Parameters ---------- cfgOpt: configparser.ConfigParser Configuration object containing the section 'PARAM_BOUNDS'. outDir: pathlib.Path - Directory containing the Morris output file - ('sa_parameters_bounds.pkl'). + Directory containing the Morris output file ('sa_parameters_bounds.pkl'). avaDir: str Directory of the avalanche. diff --git a/avaframe/ana6Optimisation/runMorrisSA.py b/avaframe/ana6Optimisation/runMorrisSA.py index 6319064fb..2a9ff42b5 100644 --- a/avaframe/ana6Optimisation/runMorrisSA.py +++ b/avaframe/ana6Optimisation/runMorrisSA.py @@ -7,7 +7,6 @@ from avaframe.in3Utils import cfgUtils from avaframe.in3Utils import fileHandlerUtils as fU -from avaframe.ana3AIMEC import ana3AIMEC import avaframe.out3Plot.outAna6Plots as saveResults import optimisationUtils @@ -26,7 +25,7 @@ def runMorrisSA(): avaName = pathlib.Path(avalancheDir).name # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak - optimisationUtils.calcArealIndicatorsAndAimec(cfgMorrisSA, avalancheDir, ana3AIMEC) + optimisationUtils.calcArealIndicatorsAndAimec(cfgMorrisSA, avalancheDir) # Load variation data with bounds from pickle file inDir = pathlib.Path(avalancheDir, 'Outputs', "ana4Stats") diff --git a/avaframe/ana6Optimisation/runOptimisation.py b/avaframe/ana6Optimisation/runOptimisation.py index 47bb17d78..67025d5ea 100644 --- a/avaframe/ana6Optimisation/runOptimisation.py +++ b/avaframe/ana6Optimisation/runOptimisation.py @@ -6,7 +6,6 @@ from avaframe.in3Utils import cfgUtils, logUtils from avaframe.in3Utils import fileHandlerUtils as fU from avaframe.in3Utils import initializeProject as initProj -from avaframe.ana3AIMEC import ana3AIMEC import avaframe.out3Plot.outAna6Plots as saveResults from avaframe.com8MoTPSA import com8MoTPSA import optimisationUtils @@ -32,7 +31,7 @@ def runOptimisation(): fU.makeADir(outDir) # Get config from morris for path to morris results - cfgDir = 'runMorrisSA.ini' + cfgDir = 'runMorrisSA.py' cfgMorrisSA = cfgUtils.getModuleConfig(pathlib.Path(cfgDir), toPrint=False) resultDirMorris = cfgMorrisSA['GENERAL']['resultDir'] inDir = pathlib.Path(avalancheDir, resultDirMorris, comModuleName) @@ -44,10 +43,10 @@ def runOptimisation(): log.info('Current avalanche: %s', avalancheDir) # Load variation parameters and their bounds - paramBounds, paramSelected = optimisationUtils.loadVariationData(cfgOpt, inDir, avalancheDir) + paramBounds, paramSelected = optimisationUtils.loadVariationData(cfgOpt, avalancheDir, inDir) # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak - optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) + optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir) # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) @@ -59,12 +58,16 @@ def runOptimisation(): log.error(message) raise RuntimeError(message) - # ---------------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------------------------------------------------ + # Two surrogate-based optimisation strategies are available: + # nonseq: Non-sequential optimisation uses a fixed set of simulations to train the surrogate and evaluate the loss. + # seq: Sequential (Bayesian) optimisation uses an initial set of samples to train the surrogate and iteratively + # adds new simulations using the Expected Improvement acquisition function. optimisationType = cfgOpt['OPTIMISATION']['optType'].lower() if optimisationType == 'nonseq': csv_path = outDir / f"{avaName}_BestOrCurrentSimulation_NonSeq.csv" # Save sim with currently best y - saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, csv_path=csv_path) + saveResults.saveBestOrSpecificSimulation(finalDF, paramSelected, csv_path=csv_path) # Create df with most important parameters and the loss function emulatorDF, emulatorScaledDF = optimisationUtils.createDFParameterLoss(finalDF, paramSelected) @@ -95,7 +98,7 @@ def runOptimisation(): com8MoTPSA.com8MoTPSAMain(cfgMain, cfgInfo=cfgPath) # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak - optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) + optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir) # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) @@ -110,13 +113,13 @@ def runOptimisation(): simNameMean=simNameMean, simNameBest=simNameBest) # Save latest real sim - saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, simName=simNameMean, csv_path=csv_path) + saveResults.saveBestOrSpecificSimulation(finalDF, paramSelected, simName=simNameMean, csv_path=csv_path) - # ---------------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------------------------------------------------ elif optimisationType == 'seq': csv_path = outDir / f"{avaName}_BestOrCurrentSimulation_BO.csv" # Save sim with currently best y - saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, csv_path=csv_path) + saveResults.saveBestOrSpecificSimulation(finalDF, paramSelected, csv_path=csv_path) eiThreshold = cfgOpt.getfloat('OPTIMISATION', 'eiThreshold') nGoodSims = cfgOpt.getfloat('OPTIMISATION', 'numberOfGoodSimulations') @@ -155,15 +158,15 @@ def runOptimisation(): com8MoTPSA.com8MoTPSAMain(cfgMain, cfgInfo=cfgPath) # Calculate Areal indicators and AIMEC and save the results in Outputs/ana3AIMEC and Outputs/out1Peak - optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir, ana3AIMEC) + optimisationUtils.calcArealIndicatorsAndAimec(cfgOpt, avalancheDir) # Read and merge results from parameter sets (simulation data), areal indicators and AIMEC finalDF = optimisationUtils.buildFinalDF(avalancheDir, paramSelected, cfgOpt) # Save latest sim simName = optimisationUtils.findSimName(finalDF, xBestDict, atol=1e-6) - saveResults.saveBestorCurrentModelrun(finalDF, paramSelected, ei, lcb, simName, - csv_path=csv_path) + saveResults.saveBestOrSpecificSimulation(finalDF, paramSelected, ei, lcb, simName, + csv_path=csv_path) # If ei is smaller than threshold, the simulation is counted as 'good', if number of good simulations is # reached, optimization stops if ei < eiThreshold: diff --git a/avaframe/out3Plot/outAna6Plots.py b/avaframe/out3Plot/outAna6Plots.py index f48647880..5b03238a8 100644 --- a/avaframe/out3Plot/outAna6Plots.py +++ b/avaframe/out3Plot/outAna6Plots.py @@ -447,7 +447,7 @@ def saveKFoldCVPrintImage(scores, pipeName, k, out_path): plt.close(fig) -def saveBestorCurrentModelrun(finalDF, paramSelected, ei=None, lcb=None, simName=None, csv_path='dummy.csv'): +def saveBestOrSpecificSimulation(finalDF, paramSelected, ei=None, lcb=None, simName=None, csv_path='dummy.csv'): """ Save either the best model run (based on optimisationVariable) or a specified simulation (simName) to a CSV file. From 408575503d0b5b23bed623d9100966fb508cdbe4 Mon Sep 17 00:00:00 2001 From: RolandFischbacher Date: Thu, 12 Mar 2026 17:55:41 +0100 Subject: [PATCH 20/20] Add definition of chunkSize to probAnaCfg.ini, if left empty 10 is used as default --- avaframe/ana4Stats/probAnaCfg.ini | 4 ++++ avaframe/com8MoTPSA/com8MoTPSA.py | 12 ++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/avaframe/ana4Stats/probAnaCfg.ini b/avaframe/ana4Stats/probAnaCfg.ini index bdddd6e8a..8fc86af48 100644 --- a/avaframe/ana4Stats/probAnaCfg.ini +++ b/avaframe/ana4Stats/probAnaCfg.ini @@ -14,6 +14,10 @@ unit = kPa peakLim = 1.0 # if only probability analysis is performed check for modName to locate peakFiles avaDir/Outputs/modName/peakFiles modName = com1DFA +# If modName = com8MoTPSA, simulations can be executed in chunks. +# chunkSize defines how many simulations are processed per chunk (running simulations, postprocessing and cleaning the working directory incrementally). +# If left empty, the default chunk size used in the script is 10. +chunkSize = [PROBRUN] diff --git a/avaframe/com8MoTPSA/com8MoTPSA.py b/avaframe/com8MoTPSA/com8MoTPSA.py index f1d7a48ea..a14716e42 100644 --- a/avaframe/com8MoTPSA/com8MoTPSA.py +++ b/avaframe/com8MoTPSA/com8MoTPSA.py @@ -14,6 +14,7 @@ import avaframe.com1DFA.com1DFA as com1DFA from avaframe.in3Utils import cfgUtils from avaframe.in2Trans import rasterUtils as rU +from avaframe.ana4Stats import probAna from avaframe.in1Data import getInput as gI import avaframe.in3Utils.fileHandlerUtils as fU from avaframe.out1Peak import outPlotAllPeak as oP @@ -61,8 +62,15 @@ def com8MoTPSAMain(cfgMain, cfgInfo=None, returnSimName=None): log.info("--- STARTING (potential) PARALLEL PART ----") - # Split into chunks to postprocess and clean up work dirs incrementally - chunkSize = cfgUtils.getNumberOfProcesses(cfgMain, len(rcfFiles)) + # Split into chunks to postprocess and clean up working directory incrementally + # Get chunkSize from probAnaCfg.ini, if it is empty use 10 as default + cfgProbAna = cfgUtils.getModuleConfig(probAna) + chunkSize = cfgProbAna.get('GENERAL', 'chunkSize', fallback='') + if chunkSize == '': + chunkSize = 10 + else: + chunkSize = int(chunkSize) + rcfChunks = [rcfFiles[i:i + chunkSize] for i in range(0, len(rcfFiles), chunkSize)] for rcfFilesChunk in rcfChunks: