From faf3cc277aaa02e86242e2843e1aa66448cc79f8 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Fri, 30 Jan 2026 15:25:18 +0100 Subject: [PATCH 1/4] model --- src/eventdisplay_ml/hyper_parameters.py | 2 +- src/eventdisplay_ml/models.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/eventdisplay_ml/hyper_parameters.py b/src/eventdisplay_ml/hyper_parameters.py index f8568fb..6776202 100644 --- a/src/eventdisplay_ml/hyper_parameters.py +++ b/src/eventdisplay_ml/hyper_parameters.py @@ -30,7 +30,7 @@ "hyper_parameters": { "objective": "binary:logistic", "eval_metric": "logloss", # TODO AUC ? - "n_estimators": 100, # TODO probably too low + "n_estimators": 2000, "max_depth": 6, "learning_rate": 0.1, "subsample": 0.8, diff --git a/src/eventdisplay_ml/models.py b/src/eventdisplay_ml/models.py index c01de4c..82a0cfa 100644 --- a/src/eventdisplay_ml/models.py +++ b/src/eventdisplay_ml/models.py @@ -586,7 +586,9 @@ def train_classification(df, model_configs): for name, cfg in model_configs.get("models", {}).items(): _logger.info(f"Training {name}") model = xgb.XGBClassifier(**cfg.get("hyper_parameters", {})) - model.fit(x_train, y_train) + model.fit( + x_train, y_train, early_stopping_rounds=10, eval_set=[(x_test, y_test)], verbose=False + ) evaluate_classification_model(model, x_test, y_test, full_df, x_data.columns.tolist(), name) cfg["model"] = model cfg["efficiency"] = evaluation_efficiency(name, model, x_test, y_test) @@ -632,7 +634,7 @@ def _log_energy_bin_counts(df): _logger.info(f"Energy bin weights (inverse-count, normalized): {inverse_counts}") - # Calculate multiplicity weights (inverse frequency) + # Calculate multiplicity weights (prioritize higher-multiplicity events) mult_counts = df["DispNImages"].value_counts() _logger.info("Training events per multiplicity:") for mult, count in mult_counts.items(): From fe81c3adb04913935d6407b5810bb140d4787184 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Fri, 30 Jan 2026 15:56:40 +0100 Subject: [PATCH 2/4] early stopping --- docs/changes/48.maintenance.md | 1 + src/eventdisplay_ml/hyper_parameters.py | 1 + src/eventdisplay_ml/models.py | 5 ++--- 3 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 docs/changes/48.maintenance.md diff --git a/docs/changes/48.maintenance.md b/docs/changes/48.maintenance.md new file mode 100644 index 0000000..fe5cf67 --- /dev/null +++ b/docs/changes/48.maintenance.md @@ -0,0 +1 @@ +Add early stopping to classification. Increase number of estimators. diff --git a/src/eventdisplay_ml/hyper_parameters.py b/src/eventdisplay_ml/hyper_parameters.py index 6776202..6f1c733 100644 --- a/src/eventdisplay_ml/hyper_parameters.py +++ b/src/eventdisplay_ml/hyper_parameters.py @@ -31,6 +31,7 @@ "objective": "binary:logistic", "eval_metric": "logloss", # TODO AUC ? "n_estimators": 2000, + "early_stopping_rounds": 50, "max_depth": 6, "learning_rate": 0.1, "subsample": 0.8, diff --git a/src/eventdisplay_ml/models.py b/src/eventdisplay_ml/models.py index 0897c5a..cc244c0 100644 --- a/src/eventdisplay_ml/models.py +++ b/src/eventdisplay_ml/models.py @@ -572,6 +572,7 @@ def train_classification(df, model_configs): _logger.info(f"Features ({len(x_data.columns)}): {', '.join(x_data.columns)}") model_configs["features"] = list(x_data.columns) y_data = full_df["label"] + x_train, x_test, y_train, y_test = train_test_split( x_data, y_data, @@ -585,9 +586,7 @@ def train_classification(df, model_configs): for name, cfg in model_configs.get("models", {}).items(): _logger.info(f"Training {name}") model = xgb.XGBClassifier(**cfg.get("hyper_parameters", {})) - model.fit( - x_train, y_train, early_stopping_rounds=10, eval_set=[(x_test, y_test)], verbose=False - ) + model.fit(x_train, y_train, eval_set=[(x_test, y_test)], verbose=True) evaluate_classification_model(model, x_test, y_test, full_df, x_data.columns.tolist(), name) cfg["model"] = model cfg["efficiency"] = evaluation_efficiency(name, model, x_test, y_test) From 56f5870e3bbfc917bcc145ec7e96d19dcc7b3747 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Fri, 30 Jan 2026 16:35:58 +0100 Subject: [PATCH 3/4] eval metric --- src/eventdisplay_ml/hyper_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eventdisplay_ml/hyper_parameters.py b/src/eventdisplay_ml/hyper_parameters.py index 6f1c733..e0a5943 100644 --- a/src/eventdisplay_ml/hyper_parameters.py +++ b/src/eventdisplay_ml/hyper_parameters.py @@ -29,7 +29,7 @@ "model": None, "hyper_parameters": { "objective": "binary:logistic", - "eval_metric": "logloss", # TODO AUC ? + "eval_metric": ["logloss", "auc"], "n_estimators": 2000, "early_stopping_rounds": 50, "max_depth": 6, From 3e7816b784853e054ba63e916e75cf27ea9399fc Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 31 Jan 2026 10:53:38 +0100 Subject: [PATCH 4/4] hyperparameter adaption for g/h --- src/eventdisplay_ml/hyper_parameters.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/eventdisplay_ml/hyper_parameters.py b/src/eventdisplay_ml/hyper_parameters.py index e0a5943..4cfe4dc 100644 --- a/src/eventdisplay_ml/hyper_parameters.py +++ b/src/eventdisplay_ml/hyper_parameters.py @@ -30,14 +30,14 @@ "hyper_parameters": { "objective": "binary:logistic", "eval_metric": ["logloss", "auc"], - "n_estimators": 2000, + "n_estimators": 5000, "early_stopping_rounds": 50, - "max_depth": 6, - "learning_rate": 0.1, + "max_depth": 7, + "learning_rate": 0.05, "subsample": 0.8, "colsample_bytree": 0.8, "random_state": None, - "n_jobs": 8, + "n_jobs": 48, }, } }