Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changes/45.maintenance.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update g/h separation to new sorting scheme of telescope-dependent variables.
14 changes: 11 additions & 3 deletions src/eventdisplay_ml/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ def flatten_telescope_data_vectorized(
flat_features[f"{var}_{tel_idx}"] = data_normalized[:, tel_idx]

index = _get_index(df, n_evt)
df_flat = flatten_telescope_variables(n_tel, flat_features, index, tel_config)
df_flat = flatten_telescope_variables(n_tel, flat_features, index, tel_config, analysis_type)
return pd.concat(
[df_flat, extra_columns(df, analysis_type, training, index, tel_config, observatory)],
axis=1,
Expand Down Expand Up @@ -814,7 +814,7 @@ def apply_clip_intervals(df, n_tel=None, apply_log10=None):
df.loc[mask_to_log, var_base] = np.log10(df.loc[mask_to_log, var_base])


def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None):
def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None, analysis_type=None):
"""Generate dataframe for telescope variables flattened for all telescopes.

Creates features for all telescope IDs, using NaN as default value for missing data.
Expand All @@ -829,13 +829,19 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None):
DataFrame index.
tel_config : dict, optional
Telescope configuration with 'max_tel_id' key.
analysis_type : str, optional
Type of analysis, e.g. "classification" or "stereo_analysis".
"""
df_flat = pd.DataFrame(flat_features, index=index)
df_flat = df_flat.astype(np.float32)

# Determine max telescope ID from config or use n_tel
max_tel_id = tel_config["max_tel_id"] if tel_config else (n_tel - 1)

keep_size_vars = analysis_type == "stereo_analysis"
if not keep_size_vars:
_logger.info(f"Dropping 'size'-related variables for {analysis_type} analysis.")

new_cols = {}
for i in range(max_tel_id + 1): # Iterate over all possible telescopes
if f"Disp_T_{i}" in df_flat:
Expand All @@ -844,7 +850,7 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None):
if f"loss_{i}" in df_flat and f"dist_{i}" in df_flat:
new_cols[f"loss_loss_{i}"] = df_flat[f"loss_{i}"] ** 2
new_cols[f"loss_dist_{i}"] = df_flat[f"loss_{i}"] * df_flat[f"dist_{i}"]
if f"size_{i}" in df_flat and f"dist_{i}" in df_flat:
if f"size_{i}" in df_flat and f"dist_{i}" in df_flat and keep_size_vars:
new_cols[f"size_dist2_{i}"] = df_flat[f"size_{i}"] / (df_flat[f"dist_{i}"] ** 2 + 1e-6)
if f"width_{i}" in df_flat and f"length_{i}" in df_flat:
new_cols[f"width_length_{i}"] = df_flat[f"width_{i}"] / (df_flat[f"length_{i}"] + 1e-6)
Expand Down Expand Up @@ -873,6 +879,8 @@ def flatten_telescope_variables(n_tel, flat_features, index, tel_config=None):
if f"cen_y_{i}" in df_flat and f"fpointing_dy_{i}" in df_flat:
df_flat[f"cen_y_{i}"] = df_flat[f"cen_y_{i}"] + df_flat[f"fpointing_dy_{i}"]
df_flat = df_flat.drop(columns=[f"fpointing_dx_{i}", f"fpointing_dy_{i}"], errors="ignore")
if not keep_size_vars:
df_flat = df_flat.drop(columns=[f"size_{i}"], errors="ignore")

return df_flat

Expand Down
7 changes: 5 additions & 2 deletions src/eventdisplay_ml/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def telescope_features(analysis_type):
List of telescope-level feature names.
"""
var = [
"size",
"cosphi",
"sinphi",
"loss",
Expand All @@ -95,7 +96,6 @@ def telescope_features(analysis_type):

return [
*var,
"size",
"cen_x",
"cen_y",
"E",
Expand Down Expand Up @@ -147,9 +147,12 @@ def _classification_features():
"MSCL",
"ArrayPointing_Elevation",
"ArrayPointing_Azimuth",
"Xcore",
"Ycore",
]
# energy used to bin the models, but not as feature
return var_tel + var_array + ["Erec"]
# size used for sorting events during flattening, but not as feature
return var_tel + var_array + ["Erec", "size"]


def clip_intervals():
Expand Down
2 changes: 1 addition & 1 deletion src/eventdisplay_ml/geomag.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"CTAO-SOUTH": {
"BX": 20.552e-6, # Tesla
"BY": 0.0, # Tesla
"BZ": -9.367 - 6, # Tesla
"BZ": -9.367e-6, # Tesla
},
}

Expand Down
3 changes: 1 addition & 2 deletions src/eventdisplay_ml/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,7 @@ def process_file_chunked(analysis_type, model_configs):
threshold_keys = sorted(
{
eff
for n_tel_models in model_configs["models"].values()
for e_bin_models in n_tel_models.values()
for e_bin_models in model_configs["models"].values()
for eff in (e_bin_models.get("thresholds") or {}).keys()
}
)
Expand Down
2 changes: 1 addition & 1 deletion src/eventdisplay_ml/scripts/train_xgb_classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Uses image and stereo parameters to train classification BDTs to separate
gamma-ray events from hadronic background events.

Separate BDTs are trained for 2, 3, and 4 telescope multiplicity events.
Trains a single classifier on all telescope multiplicity events.
"""

import logging
Expand Down