From 29e56b730b0a1a4342978e399dad6c09c7c16af7 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 28 Jan 2026 21:24:07 +0100 Subject: [PATCH 1/4] Sort by size, but drop later --- src/eventdisplay_ml/data_processing.py | 14 +++++++++++--- src/eventdisplay_ml/features.py | 6 +++++- src/eventdisplay_ml/scripts/train_xgb_classify.py | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/eventdisplay_ml/data_processing.py b/src/eventdisplay_ml/data_processing.py index 3a49b43..2dd9782 100644 --- a/src/eventdisplay_ml/data_processing.py +++ b/src/eventdisplay_ml/data_processing.py @@ -460,7 +460,7 @@ def flatten_telescope_data_vectorized( flat_features[f"{var}_{tel_idx}"] = data_normalized[:, tel_idx] index = _get_index(df, n_evt) - df_flat = flatten_telescope_variables(n_tel, flat_features, index, tel_config) + df_flat = flatten_telescope_variables(n_tel, flat_features, index, tel_config, analysis_type) return pd.concat( [df_flat, extra_columns(df, analysis_type, training, index, tel_config, observatory)], axis=1, @@ -808,7 +808,7 @@ def apply_clip_intervals(df, n_tel=None, apply_log10=None): df.loc[mask_to_log, var_base] = np.log10(df.loc[mask_to_log, var_base]) -def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None): +def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None, analysis_type=None): """Generate dataframe for telescope variables flattened for all telescopes. Creates features for all telescope IDs, using NaN as default value for missing data. @@ -823,6 +823,8 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None): DataFrame index. tel_config : dict, optional Telescope configuration with 'max_tel_id' key. + analysis_type : str, optional + Type of analysis, e.g. "classification" or "stereo_analysis". """ df_flat = pd.DataFrame(flat_features, index=index) df_flat = df_flat.astype(np.float32) @@ -830,6 +832,10 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None): # Determine max telescope ID from config or use n_tel max_tel_id = tel_config["max_tel_id"] if tel_config else (n_tel - 1) + keep_size_vars = analysis_type == "stereo_analysis" + if not keep_size_vars: + _logger.info(f"Dropping 'size'-related variables for {analysis_type} analysis.") + new_cols = {} for i in range(max_tel_id + 1): # Iterate over all possible telescopes if f"Disp_T_{i}" in df_flat: @@ -838,7 +844,7 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None): if f"loss_{i}" in df_flat and f"dist_{i}" in df_flat: new_cols[f"loss_loss_{i}"] = df_flat[f"loss_{i}"] ** 2 new_cols[f"loss_dist_{i}"] = df_flat[f"loss_{i}"] * df_flat[f"dist_{i}"] - if f"size_{i}" in df_flat and f"dist_{i}" in df_flat: + if f"size_{i}" in df_flat and f"dist_{i}" in df_flat and keep_size_vars: new_cols[f"size_dist2_{i}"] = df_flat[f"size_{i}"] / (df_flat[f"dist_{i}"] ** 2 + 1e-6) if f"width_{i}" in df_flat and f"length_{i}" in df_flat: new_cols[f"width_length_{i}"] = df_flat[f"width_{i}"] / (df_flat[f"length_{i}"] + 1e-6) @@ -867,6 +873,8 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None): if f"cen_y_{i}" in df_flat and f"fpointing_dy_{i}" in df_flat: df_flat[f"cen_y_{i}"] = df_flat[f"cen_y_{i}"] + df_flat[f"fpointing_dy_{i}"] df_flat = df_flat.drop(columns=[f"fpointing_dx_{i}", f"fpointing_dy_{i}"], errors="ignore") + if not keep_size_vars: + df_flat = df_flat.drop(columns=[f"size_{i}"], errors="ignore") return df_flat diff --git a/src/eventdisplay_ml/features.py b/src/eventdisplay_ml/features.py index 0839dfc..2e9b3a5 100644 --- a/src/eventdisplay_ml/features.py +++ b/src/eventdisplay_ml/features.py @@ -74,6 +74,7 @@ def telescope_features(analysis_type): List of telescope-level feature names. """ var = [ + "size", "cosphi", "sinphi", "loss", @@ -147,9 +148,12 @@ def _classification_features(): "MSCL", "ArrayPointing_Elevation", "ArrayPointing_Azimuth", + "Xcore", + "Ycore", ] # energy used to bin the models, but not as feature - return var_tel + var_array + ["Erec"] + # size used for sorting events during flattening, but not as feature + return var_tel + var_array + ["Erec", "size"] def clip_intervals(): diff --git a/src/eventdisplay_ml/scripts/train_xgb_classify.py b/src/eventdisplay_ml/scripts/train_xgb_classify.py index a500d51..617ba40 100644 --- a/src/eventdisplay_ml/scripts/train_xgb_classify.py +++ b/src/eventdisplay_ml/scripts/train_xgb_classify.py @@ -4,7 +4,7 @@ Uses image and stereo parameters to train classification BDTs to separate gamma-ray events from hadronic background events. -Separate BDTs are trained for 2, 3, and 4 telescope multiplicity events. +Trains a single BDT on all telescope multiplicity events. """ import logging From 4acb50e0c9e58428fd6b36a7f2988f7c5fd9ad56 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 28 Jan 2026 21:31:28 +0100 Subject: [PATCH 2/4] remove ntel depedendency --- src/eventdisplay_ml/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/eventdisplay_ml/models.py b/src/eventdisplay_ml/models.py index c01de4c..68c4c10 100644 --- a/src/eventdisplay_ml/models.py +++ b/src/eventdisplay_ml/models.py @@ -360,8 +360,7 @@ def process_file_chunked(analysis_type, model_configs): threshold_keys = sorted( { eff - for n_tel_models in model_configs["models"].values() - for e_bin_models in n_tel_models.values() + for e_bin_models in model_configs["models"].values() for eff in (e_bin_models.get("thresholds") or {}).keys() } ) From cfa1a931198de07cdec7758cc16ea402c71bbd53 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Fri, 30 Jan 2026 15:35:01 +0100 Subject: [PATCH 3/4] changelog --- docs/changes/45.maintenance.md | 1 + src/eventdisplay_ml/features.py | 1 - src/eventdisplay_ml/scripts/train_xgb_classify.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 docs/changes/45.maintenance.md diff --git a/docs/changes/45.maintenance.md b/docs/changes/45.maintenance.md new file mode 100644 index 0000000..113a390 --- /dev/null +++ b/docs/changes/45.maintenance.md @@ -0,0 +1 @@ +Update g/h separation to new sorting scheme of telescope-dependent variables. diff --git a/src/eventdisplay_ml/features.py b/src/eventdisplay_ml/features.py index 2e9b3a5..cff43a6 100644 --- a/src/eventdisplay_ml/features.py +++ b/src/eventdisplay_ml/features.py @@ -96,7 +96,6 @@ def telescope_features(analysis_type): return [ *var, - "size", "cen_x", "cen_y", "E", diff --git a/src/eventdisplay_ml/scripts/train_xgb_classify.py b/src/eventdisplay_ml/scripts/train_xgb_classify.py index 617ba40..a1abb2f 100644 --- a/src/eventdisplay_ml/scripts/train_xgb_classify.py +++ b/src/eventdisplay_ml/scripts/train_xgb_classify.py @@ -4,7 +4,7 @@ Uses image and stereo parameters to train classification BDTs to separate gamma-ray events from hadronic background events. -Trains a single BDT on all telescope multiplicity events. +Trains a single classifier on all telescope multiplicity events. """ import logging From 731c50b9cb0aa26f6a5258d460cca7be6f991fe5 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Fri, 30 Jan 2026 15:35:46 +0100 Subject: [PATCH 4/4] typo --- src/eventdisplay_ml/geomag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eventdisplay_ml/geomag.py b/src/eventdisplay_ml/geomag.py index f5e9034..559921d 100644 --- a/src/eventdisplay_ml/geomag.py +++ b/src/eventdisplay_ml/geomag.py @@ -19,7 +19,7 @@ "CTAO-SOUTH": { "BX": 20.552e-6, # Tesla "BY": 0.0, # Tesla - "BZ": -9.367 - 6, # Tesla + "BZ": -9.367e-6, # Tesla }, }