wniec · wniec · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026
diff --git a/CDB_study.slurm b/CDB_study.slurm
@@ -7,22 +7,15 @@
 #SBATCH --mem=32G
 #SBATCH --time=48:00:00
 #SBATCH --partition=plgrid-gpu-a100
-#SBATCH --array=0-9           # 10 tasks total
+#SBATCH -A plgautopt26-gpu-a100
+#SBATCH --array=0-11           # 12 tasks total
 
-CDB_VAL=${1:-1.5}
+SEED=${1:-42}
+CDB_VAL=${2:-1.5}
 
-if [ "$#" -gt 0 ]; then
-    shift
-fi
-
-if [ "$#" -eq 0 ]; then
-    PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
-else
-    PORTFOLIO=("$@")
-fi
+PORTFOLIO=('G3PCX' 'LMCMAES' 'SPSOL')
 PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
 
-
 # CONFIGURATION
 ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
 source "$ENV_PATH"
@@ -31,37 +24,53 @@ mkdir -p logs
 # Array of Dimensions
 DIMS=(2 3 5 10)
 
+echo "Initializing Array Job $SLURM_ARRAY_TASK_ID | SEED: $SEED | CDB: $CDB_VAL | PORTFOLIO: ${PORTFOLIO[*]}"
+
 # 1. Dimension-specific CV-LOIO (Indices 0-3)
 if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
     MODE="CV-LOIO"
     DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \
-      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S $SEED
 
 # 2. Dimension-specific CV-LOPO (Indices 4-7)
 elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
     MODE="CV-LOPO"
     DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \
-      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S $SEED
 
 # 3. Multidimensional CV-LOIO (Index 8)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
     MODE="CV-LOIO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S $SEED
 
 # 4. Multidimensional CV-LOPO (Index 9)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
     MODE="CV-LOPO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S $SEED
+
+# 5. Multidimensional CV-LODO (Index 10)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 10 ]]; then
+    MODE="CV-LODO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S $SEED
+
+# 6. Global Random Agent (Index 11)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 11 ]]; then
+    echo "Running Mode: Global Random Agent"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random -S "$SEED"
+
 fi
diff --git a/comprehensive_study.slurm b/comprehensive_study.slurm
@@ -0,0 +1,58 @@
+#!/bin/bash
+#SBATCH --job-name=rl_das_experiment
+#SBATCH --output=logs/experiment_%A_%a.out
+#SBATCH --error=logs/experiment_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH -A plgautopt26-gpu-a100
+#SBATCH --array=0-12           # Increased to 13 tasks total to split sequential runs
+
+# 1st argument: SEED (Default: 42)
+SEED=${1:-1.5}
+
+# Fixed PORTFOLIO variable
+PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
+
+# CONFIGURATION
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 1. Dimension-specific CV-LOIO | RL-DAS (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED"
+
+# 3. Dimension-specific CV-LOPO | RL-DAS (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED"
+
+# 5. Dimension-specific RL-DAS-random (Indices 8-11)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]}
+    echo "Running Mode: Random Agent - RLDAS variant | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --agent RL-DAS-random --dimensionality $DIM -S "$SEED"
+
+# 6. Global Baselines (Index 12)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 12 ]]; then
+    echo "Running Mode: Baselines"
+    python3 dynamicalgorithmselection/main.py BASELINES \
+      -p "${PORTFOLIO[@]}" --mode baselines -S "$SEED"
+fi
diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py
@@ -4,13 +4,14 @@
 
 from dynamicalgorithmselection.agents.agent_reward import AgentReward
 from dynamicalgorithmselection.agents.agent_state import (
-    get_state_representation,
     StateNormalizer,
+    ela_state_representation,
+    BASE_STATE_SIZE,
+    AgentState,
 )
 from dynamicalgorithmselection.agents.agent_utils import (
     get_checkpoints,
     StepwiseRewardNormalizer,
-    MAX_DIM,
 )
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 from dynamicalgorithmselection.optimizers.RestartOptimizer import restart_optimizer
@@ -52,48 +53,28 @@ def __init__(self, problem, options):
         self.reward_normalizer = self.options.get(
             "reward_normalizer", StepwiseRewardNormalizer(max_steps=self.n_checkpoints)
         )
-        self.state_representation, self.state_dim = get_state_representation(
-            self.options.get("state_representation", None), len(self.actions)
-        )
+        n_actions = len(self.actions)
+        self.state_dim = BASE_STATE_SIZE + 2 * n_actions
         self.state_normalizer = self.options.get(
             "state_normalizer", StateNormalizer(input_shape=(self.state_dim,))
         )
         self.initial_value_range: Tuple[Optional[float], Optional[float]] = (None, None)
         self.reward_method = AgentReward(self.options.get("reward_option", 1))
 
-    def get_partial_state(
+    def get_optimization_state(
         self,
-        x: Optional[np.ndarray],
-        y: Optional[np.ndarray],
-        optimization_state: bool = False,
     ) -> np.ndarray:
-        sr_additional_params = (
-            self.lower_boundary,
-            self.upper_boundary,
+        state_representation_object = AgentState(
+            len(self.actions),
             self.choices_history,
             self.n_checkpoints,
             self.ndim_problem,
         )
 
-        if x is None or y is None:
-            if self.options.get("state_representation") != "ELA":
-                state_representation = self.state_representation(
-                    np.zeros((50, self.ndim_problem)),
-                    np.zeros((50,)),
-                    sr_additional_params,
-                )
-            else:
-                state_representation = (np.zeros((43,)),)
-
-            return np.append(state_representation, (0, 0) if optimization_state else ())
-        used_fe = self.n_function_evaluations / self.max_function_evaluations
-        stagnation_coef = self.stagnation_count / self.max_function_evaluations
+        if len(self.choices_history) == 0:
+            return state_representation_object.get_initial_state()
 
-        state_representation = self.state_representation(x, y, sr_additional_params)
-        return np.append(
-            state_representation,
-            (used_fe, stagnation_coef) if optimization_state else (),
-        )
+        return state_representation_object.get_state()
 
     def get_state(
         self,
@@ -108,20 +89,21 @@ def get_state(
             indices = np.sort(indices)
             x_history = x_history[indices]
             y_history = y_history[indices]
-
-        if self.options.get("state_representation") != "ELA":
-            landscape_state = self.get_partial_state(x_history, y_history).flatten()
-            optimization_state = self.get_partial_state(x, y, True).flatten()
-            state = np.concatenate((landscape_state, optimization_state))
+            landscape_state = ela_state_representation(x_history, y_history)
         else:
-            partial_state = self.get_partial_state(x_history, y_history, True).flatten()
-            state = np.append(
-                partial_state,
-                (
-                    self.ndim_problem / MAX_DIM,
-                    self.n_function_evaluations / self.max_function_evaluations,
-                ),
+            landscape_state = np.zeros(
+                43,
             )
+
+        optimization_state = self.get_optimization_state()
+        state = np.concatenate((landscape_state, optimization_state))
+        state = np.append(
+            state,
+            (
+                self.n_function_evaluations / self.max_function_evaluations,
+                self.stagnation_count / self.max_function_evaluations,
+            ),
+        )
         return self.state_normalizer.normalize(state, update)
 
     def _print_verbose_info(self, fitness, y):