diff --git a/docs/changes/45.maintenance.md b/docs/changes/45.maintenance.md new file mode 100644 index 0000000..113a390 --- /dev/null +++ b/docs/changes/45.maintenance.md @@ -0,0 +1 @@ +Update g/h separation to new sorting scheme of telescope-dependent variables. diff --git a/src/eventdisplay_ml/data_processing.py b/src/eventdisplay_ml/data_processing.py index f2246d8..85763d5 100644 --- a/src/eventdisplay_ml/data_processing.py +++ b/src/eventdisplay_ml/data_processing.py @@ -460,7 +460,7 @@ def flatten_telescope_data_vectorized( flat_features[f"{var}_{tel_idx}"] = data_normalized[:, tel_idx] index = _get_index(df, n_evt) - df_flat = flatten_telescope_variables(n_tel, flat_features, index, tel_config) + df_flat = flatten_telescope_variables(n_tel, flat_features, index, tel_config, analysis_type) return pd.concat( [df_flat, extra_columns(df, analysis_type, training, index, tel_config, observatory)], axis=1, @@ -814,7 +814,7 @@ def apply_clip_intervals(df, n_tel=None, apply_log10=None): df.loc[mask_to_log, var_base] = np.log10(df.loc[mask_to_log, var_base]) -def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None): +def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None, analysis_type=None): """Generate dataframe for telescope variables flattened for all telescopes. Creates features for all telescope IDs, using NaN as default value for missing data. @@ -829,6 +829,8 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None): DataFrame index. tel_config : dict, optional Telescope configuration with 'max_tel_id' key. + analysis_type : str, optional + Type of analysis, e.g. "classification" or "stereo_analysis". """ df_flat = pd.DataFrame(flat_features, index=index) df_flat = df_flat.astype(np.float32) @@ -836,6 +838,10 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None): # Determine max telescope ID from config or use n_tel max_tel_id = tel_config["max_tel_id"] if tel_config else (n_tel - 1) + keep_size_vars = analysis_type == "stereo_analysis" + if not keep_size_vars: + _logger.info(f"Dropping 'size'-related variables for {analysis_type} analysis.") + new_cols = {} for i in range(max_tel_id + 1): # Iterate over all possible telescopes if f"Disp_T_{i}" in df_flat: @@ -844,7 +850,7 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None): if f"loss_{i}" in df_flat and f"dist_{i}" in df_flat: new_cols[f"loss_loss_{i}"] = df_flat[f"loss_{i}"] ** 2 new_cols[f"loss_dist_{i}"] = df_flat[f"loss_{i}"] * df_flat[f"dist_{i}"] - if f"size_{i}" in df_flat and f"dist_{i}" in df_flat: + if f"size_{i}" in df_flat and f"dist_{i}" in df_flat and keep_size_vars: new_cols[f"size_dist2_{i}"] = df_flat[f"size_{i}"] / (df_flat[f"dist_{i}"] ** 2 + 1e-6) if f"width_{i}" in df_flat and f"length_{i}" in df_flat: new_cols[f"width_length_{i}"] = df_flat[f"width_{i}"] / (df_flat[f"length_{i}"] + 1e-6) @@ -873,6 +879,8 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None): if f"cen_y_{i}" in df_flat and f"fpointing_dy_{i}" in df_flat: df_flat[f"cen_y_{i}"] = df_flat[f"cen_y_{i}"] + df_flat[f"fpointing_dy_{i}"] df_flat = df_flat.drop(columns=[f"fpointing_dx_{i}", f"fpointing_dy_{i}"], errors="ignore") + if not keep_size_vars: + df_flat = df_flat.drop(columns=[f"size_{i}"], errors="ignore") return df_flat diff --git a/src/eventdisplay_ml/features.py b/src/eventdisplay_ml/features.py index 0839dfc..cff43a6 100644 --- a/src/eventdisplay_ml/features.py +++ b/src/eventdisplay_ml/features.py @@ -74,6 +74,7 @@ def telescope_features(analysis_type): List of telescope-level feature names. """ var = [ + "size", "cosphi", "sinphi", "loss", @@ -95,7 +96,6 @@ def telescope_features(analysis_type): return [ *var, - "size", "cen_x", "cen_y", "E", @@ -147,9 +147,12 @@ def _classification_features(): "MSCL", "ArrayPointing_Elevation", "ArrayPointing_Azimuth", + "Xcore", + "Ycore", ] # energy used to bin the models, but not as feature - return var_tel + var_array + ["Erec"] + # size used for sorting events during flattening, but not as feature + return var_tel + var_array + ["Erec", "size"] def clip_intervals(): diff --git a/src/eventdisplay_ml/geomag.py b/src/eventdisplay_ml/geomag.py index f5e9034..559921d 100644 --- a/src/eventdisplay_ml/geomag.py +++ b/src/eventdisplay_ml/geomag.py @@ -19,7 +19,7 @@ "CTAO-SOUTH": { "BX": 20.552e-6, # Tesla "BY": 0.0, # Tesla - "BZ": -9.367 - 6, # Tesla + "BZ": -9.367e-6, # Tesla }, } diff --git a/src/eventdisplay_ml/models.py b/src/eventdisplay_ml/models.py index c01de4c..68c4c10 100644 --- a/src/eventdisplay_ml/models.py +++ b/src/eventdisplay_ml/models.py @@ -360,8 +360,7 @@ def process_file_chunked(analysis_type, model_configs): threshold_keys = sorted( { eff - for n_tel_models in model_configs["models"].values() - for e_bin_models in n_tel_models.values() + for e_bin_models in model_configs["models"].values() for eff in (e_bin_models.get("thresholds") or {}).keys() } ) diff --git a/src/eventdisplay_ml/scripts/train_xgb_classify.py b/src/eventdisplay_ml/scripts/train_xgb_classify.py index a500d51..a1abb2f 100644 --- a/src/eventdisplay_ml/scripts/train_xgb_classify.py +++ b/src/eventdisplay_ml/scripts/train_xgb_classify.py @@ -4,7 +4,7 @@ Uses image and stereo parameters to train classification BDTs to separate gamma-ray events from hadronic background events. -Separate BDTs are trained for 2, 3, and 4 telescope multiplicity events. +Trains a single classifier on all telescope multiplicity events. """ import logging