diff --git a/CDB_study.slurm b/CDB_study.slurm
index 628326b..b1bd280 100644
--- a/CDB_study.slurm
+++ b/CDB_study.slurm
@@ -7,22 +7,15 @@
 #SBATCH --mem=32G
 #SBATCH --time=48:00:00
 #SBATCH --partition=plgrid-gpu-a100
-#SBATCH --array=0-9           # 10 tasks total
+#SBATCH -A plgautopt26-gpu-a100
+#SBATCH --array=0-11           # 12 tasks total
 
-CDB_VAL=${1:-1.5}
+SEED=${1:-42}
+CDB_VAL=${2:-1.5}
 
-if [ "$#" -gt 0 ]; then
-    shift
-fi
-
-if [ "$#" -eq 0 ]; then
-    PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
-else
-    PORTFOLIO=("$@")
-fi
+PORTFOLIO=('G3PCX' 'LMCMAES' 'SPSOL')
 PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
 
-
 # CONFIGURATION
 ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
 source "$ENV_PATH"
@@ -31,15 +24,17 @@ mkdir -p logs
 # Array of Dimensions
 DIMS=(2 3 5 10)
 
+echo "Initializing Array Job $SLURM_ARRAY_TASK_ID | SEED: $SEED | CDB: $CDB_VAL | PORTFOLIO: ${PORTFOLIO[*]}"
+
 # 1. Dimension-specific CV-LOIO (Indices 0-3)
 if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
     MODE="CV-LOIO"
     DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \
-      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S $SEED
 
 # 2. Dimension-specific CV-LOPO (Indices 4-7)
 elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
@@ -47,21 +42,35 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
     DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \
-      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S $SEED
 
 # 3. Multidimensional CV-LOIO (Index 8)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
     MODE="CV-LOIO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S $SEED
 
 # 4. Multidimensional CV-LOPO (Index 9)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
     MODE="CV-LOPO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S $SEED
+
+# 5. Multidimensional CV-LODO (Index 10)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 10 ]]; then
+    MODE="CV-LODO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S $SEED
+
+# 6. Global Random Agent (Index 11)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 11 ]]; then
+    echo "Running Mode: Global Random Agent"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random -S "$SEED"
+
 fi
\ No newline at end of file
diff --git a/comprehensive_study.slurm b/comprehensive_study.slurm
new file mode 100644
index 0000000..1384124
--- /dev/null
+++ b/comprehensive_study.slurm
@@ -0,0 +1,58 @@
+#!/bin/bash
+#SBATCH --job-name=rl_das_experiment
+#SBATCH --output=logs/experiment_%A_%a.out
+#SBATCH --error=logs/experiment_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH -A plgautopt26-gpu-a100
+#SBATCH --array=0-12           # Increased to 13 tasks total to split sequential runs
+
+# 1st argument: SEED (Default: 42)
+SEED=${1:-1.5}
+
+# Fixed PORTFOLIO variable
+PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
+
+# CONFIGURATION
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 1. Dimension-specific CV-LOIO | RL-DAS (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED"
+
+# 3. Dimension-specific CV-LOPO | RL-DAS (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED"
+
+# 5. Dimension-specific RL-DAS-random (Indices 8-11)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]}
+    echo "Running Mode: Random Agent - RLDAS variant | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --agent RL-DAS-random --dimensionality $DIM -S "$SEED"
+
+# 6. Global Baselines (Index 12)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 12 ]]; then
+    echo "Running Mode: Baselines"
+    python3 dynamicalgorithmselection/main.py BASELINES \
+      -p "${PORTFOLIO[@]}" --mode baselines -S "$SEED"
+fi
\ No newline at end of file
diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py
index abecf65..2620080 100644
--- a/dynamicalgorithmselection/agents/agent.py
+++ b/dynamicalgorithmselection/agents/agent.py
@@ -4,13 +4,14 @@
 
 from dynamicalgorithmselection.agents.agent_reward import AgentReward
 from dynamicalgorithmselection.agents.agent_state import (
-    get_state_representation,
     StateNormalizer,
+    ela_state_representation,
+    BASE_STATE_SIZE,
+    AgentState,
 )
 from dynamicalgorithmselection.agents.agent_utils import (
     get_checkpoints,
     StepwiseRewardNormalizer,
-    MAX_DIM,
 )
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 from dynamicalgorithmselection.optimizers.RestartOptimizer import restart_optimizer
@@ -52,48 +53,28 @@ def __init__(self, problem, options):
         self.reward_normalizer = self.options.get(
             "reward_normalizer", StepwiseRewardNormalizer(max_steps=self.n_checkpoints)
         )
-        self.state_representation, self.state_dim = get_state_representation(
-            self.options.get("state_representation", None), len(self.actions)
-        )
+        n_actions = len(self.actions)
+        self.state_dim = BASE_STATE_SIZE + 2 * n_actions
         self.state_normalizer = self.options.get(
             "state_normalizer", StateNormalizer(input_shape=(self.state_dim,))
         )
         self.initial_value_range: Tuple[Optional[float], Optional[float]] = (None, None)
         self.reward_method = AgentReward(self.options.get("reward_option", 1))
 
-    def get_partial_state(
+    def get_optimization_state(
         self,
-        x: Optional[np.ndarray],
-        y: Optional[np.ndarray],
-        optimization_state: bool = False,
     ) -> np.ndarray:
-        sr_additional_params = (
-            self.lower_boundary,
-            self.upper_boundary,
+        state_representation_object = AgentState(
+            len(self.actions),
             self.choices_history,
             self.n_checkpoints,
             self.ndim_problem,
         )
 
-        if x is None or y is None:
-            if self.options.get("state_representation") != "ELA":
-                state_representation = self.state_representation(
-                    np.zeros((50, self.ndim_problem)),
-                    np.zeros((50,)),
-                    sr_additional_params,
-                )
-            else:
-                state_representation = (np.zeros((43,)),)
-
-            return np.append(state_representation, (0, 0) if optimization_state else ())
-        used_fe = self.n_function_evaluations / self.max_function_evaluations
-        stagnation_coef = self.stagnation_count / self.max_function_evaluations
+        if len(self.choices_history) == 0:
+            return state_representation_object.get_initial_state()
 
-        state_representation = self.state_representation(x, y, sr_additional_params)
-        return np.append(
-            state_representation,
-            (used_fe, stagnation_coef) if optimization_state else (),
-        )
+        return state_representation_object.get_state()
 
     def get_state(
         self,
@@ -108,20 +89,21 @@ def get_state(
             indices = np.sort(indices)
             x_history = x_history[indices]
             y_history = y_history[indices]
-
-        if self.options.get("state_representation") != "ELA":
-            landscape_state = self.get_partial_state(x_history, y_history).flatten()
-            optimization_state = self.get_partial_state(x, y, True).flatten()
-            state = np.concatenate((landscape_state, optimization_state))
+            landscape_state = ela_state_representation(x_history, y_history)
         else:
-            partial_state = self.get_partial_state(x_history, y_history, True).flatten()
-            state = np.append(
-                partial_state,
-                (
-                    self.ndim_problem / MAX_DIM,
-                    self.n_function_evaluations / self.max_function_evaluations,
-                ),
+            landscape_state = np.zeros(
+                43,
             )
+
+        optimization_state = self.get_optimization_state()
+        state = np.concatenate((landscape_state, optimization_state))
+        state = np.append(
+            state,
+            (
+                self.n_function_evaluations / self.max_function_evaluations,
+                self.stagnation_count / self.max_function_evaluations,
+            ),
+        )
         return self.state_normalizer.normalize(state, update)
 
     def _print_verbose_info(self, fitness, y):
diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py
index 3ad9d39..56d2fee 100644
--- a/dynamicalgorithmselection/agents/agent_state.py
+++ b/dynamicalgorithmselection/agents/agent_state.py
@@ -1,6 +1,4 @@
 import warnings
-from operator import itemgetter
-from typing import Tuple, Callable, Any, Literal
 
 import numpy as np
 import pandas as pd
@@ -15,29 +13,11 @@
 from scipy.stats import spearmanr
 from dynamicalgorithmselection.agents.agent_utils import MAX_DIM, RunningMeanStd
 
-BASE_STATE_SIZE = 102
+BASE_STATE_SIZE = 48
 MAX_CONSIDERED_POPSIZE = 2500
 
 
-def get_state_representation(
-    name: Literal["ELA", "custom"], n_actions: int
-) -> Tuple[Callable[[np.ndarray, np.ndarray, Any], np.ndarray], int]:
-    """
-    :param name: name of the state representation mode
-    :param n_actions: number of actions to take
-    :return: function used to infer state representation from population and dimensionality of that state representation
-    """
-    if name == "ELA":
-        return lambda x, y, *args: ela_state_representation(x, y), 47
-    elif name == "custom":
-        return lambda x, y, args: AgentState(
-            x, y, n_actions, *args
-        ).get_state(), BASE_STATE_SIZE + 2 * n_actions + 2
-    else:
-        raise ValueError("incorrect state representation")
-
-
-def ela_state_representation(x, y, *args):
+def ela_state_representation(x, y):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
 
@@ -88,51 +68,19 @@ def ela_state_representation(x, y, *args):
 class AgentState:
     def __init__(
         self,
-        x: np.ndarray,
-        y: np.ndarray,
         n_actions,
-        lower_bound,
-        upper_bound,
         choice_history,
         n_checkpoints,
         n_dim_problem,
     ):
-        self.x = x
-        self.y = y
         self.n_actions = n_actions
         self.n_checkpoints = n_checkpoints
         self.ndim_problem = n_dim_problem
 
-        if x is None:
-            return
-
-        best_idx = y.argmin()
-        worst_idx = y.argmax()
-
-        self.best_x: np.ndarray = x[best_idx]
-        self.best_y: float = y[best_idx]
-        self.worst_x: np.ndarray = x[worst_idx]
-        self.worst_y: float = y[worst_idx]
-        self.lower_bound = lower_bound
-        self.upper_bound = upper_bound
+        if len(choice_history) < 1:
+            return  # the rest of properties won't be needed
         self.choice_history = choice_history
 
-        self.y_normalized = (y - y.mean()) / (y.std() + 1e-6)
-        self.max_distance = distance(self.lower_bound, self.upper_bound)
-        self.sorted_indices = sorted(
-            [i for i, _ in enumerate(y)], key=lambda i: y[i]
-        )  # population indices sorted by fitness
-        self.measured_individuals = list(
-            itemgetter(*(min(i, len(y) - 1) for i in (1, 2, 3, 4, 5, 6, 9, 12, 15)))(
-                self.sorted_indices
-            )
-        )
-        self.x_mean = x.mean(axis=0)
-        self.population_relative = x - self.x_mean
-        self.normalized_x = (x - x.mean(axis=0)) / (x.std() + 1e-8)
-        self.x_std = 2 * x.std(axis=0) / (self.upper_bound - self.lower_bound)
-
-        self.mean_historic_y = y.mean()
         self.last_action_index = (
             self.choice_history[-1] if self.choice_history else None
         )
@@ -147,112 +95,6 @@ def __init__(
             for j in range(self.n_actions)
         ]
 
-    def get_weighted_central_moment(self, n: int):
-        norms_squared = np.linalg.norm(
-            self.population_relative, ord=2, axis=1
-        )  # shape (pop,)
-        weights = self.get_fitness_weights()
-        exponent = n / 2
-        numerator = min((weights * norms_squared**exponent).sum(), 1e8)
-        inertia_denom_w = np.linalg.norm(weights)
-        inertia_denom_n = np.linalg.norm(norms_squared**exponent)
-        return numerator / max(1e-5, inertia_denom_w * inertia_denom_n)
-
-    def normalized_distance(self, x0: np.ndarray, x1: np.ndarray) -> float:
-        return float(min(np.linalg.norm(x0 - x1) / self.max_distance, 1.0))
-
-    def get_fitness_weights(self) -> np.ndarray:
-        weights = (
-            (1.0 - (self.y - self.y.min()) / (self.y.max() - self.y.min()))
-            if (self.y.max() - self.y.min() > 1e-6)
-            else np.ones_like(self.y)
-        )
-        return weights / weights.sum()
-
-    def population_relative_radius(self) -> float:
-        population_radius = np.linalg.norm(self.x.max(axis=0) - self.x.min(axis=0))
-        return float(population_radius / self.max_distance)
-
-    def slopes_stats(self) -> tuple:
-        return get_list_stats(
-            [
-                inverse_scaling(
-                    max(self.y_normalized[j] - self.y_normalized[i], 1)
-                    / (self.normalized_distance(self.x[i], self.x[j]) + 1e-6)
-                )
-                for i, j in zip(self.sorted_indices, self.sorted_indices[1:])
-            ]
-        )
-
-    def y_difference_stats(self) -> tuple:
-        return get_list_stats(
-            [
-                min(self.y_normalized[j] - self.y_normalized[i], 1)
-                for i, j in zip(self.sorted_indices, self.sorted_indices[1:])
-            ]
-        )
-
-    def distances_from_best(self) -> list:
-        return [
-            self.normalized_distance(self.x[i], self.best_x)
-            for i in self.measured_individuals + [0, -1]
-        ]
-
-    def distances_from_mean(self) -> list:
-        return [
-            self.normalized_distance(self.x[i], self.x_mean)
-            for i in self.measured_individuals + [0, -1]
-        ]
-
-    def explored_volume(self) -> float:
-        return np.prod(
-            (self.x.max(axis=0) - self.x.min(axis=0))
-            / (self.upper_bound - self.lower_bound)
-        )
-
-    def relative_improvement(self):
-        return max(0.0, (np.min(self.y) - self.best_y)) / (
-            (self.worst_y - self.best_y) or 1.0
-        )
-
-    def normalized_x_stats(self) -> tuple:
-        return (
-            np.clip((self.normalized_x**2).mean(), -1, 1),
-            np.clip((self.normalized_x**2).min(), -1, 1),
-            np.clip((self.normalized_x**2).max(), -1, 1),
-            np.clip((self.normalized_x**2).std(), -1, 1),
-        )
-
-    def relative_y_differences(self) -> list:
-        return [
-            (self.y[i] - np.min(self.y)) / max((self.worst_y - self.best_y), 1e-6)
-            for i in self.measured_individuals
-        ]
-
-    def x_standard_deviation_stats(self) -> tuple:
-        return (
-            self.x_std.max(),
-            self.x_std.min(),
-            self.x_std.mean(),
-            2 * self.x_std.std(),
-        )
-
-    def y_historic_improvement(self) -> float:
-        return (self.mean_historic_y - self.best_y) / (
-            (self.worst_y - self.best_y) or 1
-        )
-
-    def y_deviation(self) -> float:
-        middle_y = (self.worst_y - self.best_y) / 2
-        max_possible_std = self.best_y - middle_y
-        # dividing twice by std instead of variance due to numerical instability issues
-        return (
-            sum((i - self.mean_historic_y) ** 2 for i in self.y)
-            / len(self.y)
-            / max_possible_std
-            / max_possible_std
-        )
-
     def choice_entropy(self) -> float:
         return -(
             np.array(self.choices_frequency)
@@ -268,57 +110,26 @@ def same_action_counter(self) -> int:
                 break
         return same_action_counter
 
-    def mean_falling_behind(self) -> float:
-        return (self.y - self.best_y).mean() / (
-            max((self.y.max() - self.best_y), (self.y - self.best_y).mean()) or 1
-        )
-
     def get_initial_state(self):
         vector = [
-            0.0,  # third weighted central moment
-            0.0,  # second weighted central moment
-            0.0,  # normalized domination of best solution
-            0.0,  # normalized radius of the smallest sphere containing entire population
-            0.5,  # normalized relative fitness difference
-            0.5,  # average_y relative to best
-            1.0,  # normalized y deviation measure
-            1.0,  # full remaining budget (max evaluations)
-            0.0,  # stagnation count
-            *([0.0] * (51 + 2 * self.n_actions)),
-            self.ndim_problem / 40,  # normalized problem dimension
+            *(0.0 for _ in range(self.n_actions)),  # last action encoded
+            0.0,  # same action counter
+            *(0.0 for _ in range(self.n_actions)),  # choices frequency
+            0.0,  # choice entropy
+            self.ndim_problem / MAX_DIM,  # normalized problem dimension
         ]
         return np.array(vector, dtype=np.float32)
 
-    def get_state(self, optimization_status=False) -> np.ndarray:
-        if len(self.x) < 1:
+    def get_state(self) -> np.ndarray:
+        if len(self.choice_history) < 1:
             return self.get_initial_state()
         else:
             vector = [
-                self.get_weighted_central_moment(3),
-                self.get_weighted_central_moment(2),
-                self.mean_falling_behind(),
-                self.population_relative_radius(),
-                self.relative_improvement(),
-                self.y_historic_improvement(),
-                self.y_deviation(),
-                *self.distances_from_best(),
-                *self.distances_from_mean(),
-                *self.relative_y_differences(),
-                *(self.last_action_encoded if optimization_status else ()),
-                *(
-                    (self.same_action_counter() / self.n_checkpoints,)
-                    if optimization_status
-                    else ()
-                ),
-                *(self.choices_frequency if optimization_status else ()),
-                self.explored_volume() ** (1 / self.ndim_problem),  # searched volume
-                *self.x_standard_deviation_stats(),
-                *self.normalized_x_stats(),
-                *((self.choice_entropy(),) if optimization_status else ()),
-                self.normalized_distance(self.best_x, self.worst_x),
-                *self.y_difference_stats(),
-                *self.slopes_stats(),
-                *((self.ndim_problem / MAX_DIM,) if optimization_status else ()),
+                *self.last_action_encoded,
+                self.same_action_counter() / self.n_checkpoints,
+                *self.choices_frequency,
+                self.choice_entropy(),
+                self.ndim_problem / MAX_DIM,
             ]
         return np.array(vector, dtype=np.float32)
 
@@ -327,19 +138,6 @@ def distance(x0: np.ndarray, x1: np.ndarray) -> float:
     return float(np.linalg.norm(x0 - x1))
 
 
-def inverse_scaling(x):
-    # Monotonic increacing in [0, inf) function that is bounded in [0, 1)
-    return x / (x + 5)
-
-
-def get_list_stats(data: list):
-    return (
-        max(data),
-        min(data),
-        sum(data) / len(data),
-    )
-
-
 class StateNormalizer:
     def __init__(self, input_shape):
         self.rms = RunningMeanStd(shape=input_shape)
diff --git a/dynamicalgorithmselection/agents/policy_gradient_agent.py b/dynamicalgorithmselection/agents/policy_gradient_agent.py
index e70a2dc..b455965 100644
--- a/dynamicalgorithmselection/agents/policy_gradient_agent.py
+++ b/dynamicalgorithmselection/agents/policy_gradient_agent.py
@@ -210,14 +210,13 @@ def _collect(self, fitness, y=None):
 
         return results, agent_state
 
-    def _prepare_state_tensor(self, x, y, full_buffer):
+    def _prepare_state_tensor(self, x, y):
         """Generates and normalizes the state tensor using self.iterations_history."""
         state = self.get_state(
             x,
             y,
             self.iterations_history["x"],
             self.iterations_history["y"],
-            self.train_mode and not full_buffer,
         )
         state = torch.nan_to_num(
             torch.tensor(state), nan=0.5, neginf=0.0, posinf=1.0
@@ -322,7 +321,7 @@ def optimize(self, fitness_function=None, args=None):
             full_buffer = self.buffer.size() >= self.buffer.capacity
 
             # Prepare State (uses self.iterations_history internally)
-            state = self._prepare_state_tensor(x, y, full_buffer)
+            state = self._prepare_state_tensor(x, y)
 
             # Select Action
             action, log_prob, value = self._select_action(state, full_buffer)
diff --git a/dynamicalgorithmselection/main.py b/dynamicalgorithmselection/main.py
index 6480d1f..a04bc6c 100644
--- a/dynamicalgorithmselection/main.py
+++ b/dynamicalgorithmselection/main.py
@@ -105,15 +105,6 @@ def parse_arguments():
         help="specify which agent to use",
     )
 
-    parser.add_argument(
-        "-r",
-        "--state-representation",
-        type=str,
-        default="ELA",
-        choices=["ELA", "custom"],
-        help="specify which state representation to use",
-    )
-
     parser.add_argument(
         "-x",
         "--cdb",
@@ -183,7 +174,6 @@ def print_info(args):
     print("Weights and Biases project: ", args.wandb_project)
     print("Agent type: ", args.agent if args.mode != "baselines" else None)
     print("Exponential checkpoint division base: ", args.cdb)
-    print("State representation variant: ", args.state_representation)
     print("Forcing restarts: ", args.force_restarts)
     print("Dimensionality of problems: ", args.dimensionality)
     print("Number of training epochs: ", args.n_epochs)
@@ -195,7 +185,6 @@ def common_options(args) -> Dict[str, Any]:
         "n_checkpoints": args.n_checkpoints,
         "n_individuals": args.population_size,
         "cdb": args.cdb,
-        "state_representation": args.state_representation,
         "force_restarts": args.force_restarts,
         "dimensionality": args.dimensionality,
         "n_epochs": args.n_epochs,
diff --git a/portfolio_study.slurm b/portfolio_study.slurm
index b66c396..7a23fe8 100644
--- a/portfolio_study.slurm
+++ b/portfolio_study.slurm
@@ -10,12 +10,18 @@
 #SBATCH -A plgautopt26-gpu-a100
 #SBATCH --array=0-9 # 10 tasks total
 
-CDB_VAL=1.5
+CDB_VAL=1.2
 
-PORTFOLIO=('MADDE' 'CMAES' 'SPSO')
+SEED=${1:-42}
 
-PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+# If no additional arguments are passed, use the default portfolio
+if [ "$#" -lt 2 ]; then
+    PORTFOLIO=('G3PCX' 'LMCMAES' 'SPSO')
+else
+    PORTFOLIO=("${@:2}")
+fi
 
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
 
 # CONFIGURATION
 ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
@@ -25,15 +31,17 @@ mkdir -p logs
 # Array of Dimensions
 DIMS=(2 3 5 10)
 
+echo "Initializing Array Job $SLURM_ARRAY_TASK_ID | SEED: $SEED | PORTFOLIO: ${PORTFOLIO[*]}"
+
 # 1. Dimension-specific CV-LOIO (Indices 0-3)
 if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
     MODE="CV-LOIO"
     DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_$_CDB${CDB_VAL}_DIM${DIM} \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \
       -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S $SEED
 
 # 2. Dimension-specific CV-LOPO (Indices 4-7)
 elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
@@ -41,21 +49,21 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
     DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM} \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \
       -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S $SEED
 
 # 3. Multidimensional CV-LOIO (Index 8)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
     MODE="CV-LOIO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient --dimensionality 2 3 5 10 -S $SEED
 
 # 4. Multidimensional CV-LOPO (Index 9)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
     MODE="CV-LOPO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient --dimensionality 2 3 5 10 -S $SEED
 fi
\ No newline at end of file
diff --git a/runner.sh b/runner.sh
new file mode 100644
index 0000000..2d88205
--- /dev/null
+++ b/runner.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+SEEDS=(123 234 345 456)
+
+CDB_VALUES=(1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7)
+
+PORTFOLIOS=(
+    "CMAES SPSOL OPOA2015"
+    "G3PCX LMCMAES SPSO"
+    "MADDE JDE21 NL_SHADE_RSP"
+)
+
+echo "Starting Cartesian product job submissions..."
+
+for SEED in "${SEEDS[@]}"; do
+    for PORTFOLIO in "${PORTFOLIOS[@]}"; do
+
+        echo "Submitting portfolio study with: SEED=${SEED} | PORTFOLIO=${PORTFOLIO}"
+
+        sbatch portfolio_study.slurm $SEED $PORTFOLIO
+
+        sleep 1
+
+    done
+
+    for CDB in "${CDB_VALUES[@]}"; do
+
+        echo "Submitting single algorithm study with: SEED=${SEED} | CDB=${CDB}"
+        sbatch single_algorithm_CDB_study.slurm $SEED $CDB
+        echo "Submitting CDB study with: SEED=${SEED} | CDB=${CDB}"
+        sbatch CDB_study.slurm $SEED $CDB
+
+        sleep 1
+
+    done
+
+    echo "Submitting comprehensive study with: SEED=${SEED}"
+    sbatch comprehensive_study.slurm $SEED
+
+done
+
+echo "All jobs submitted!"
\ No newline at end of file
diff --git a/runner.slurm b/runner.slurm
deleted file mode 100644
index 12c7916..0000000
--- a/runner.slurm
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=rl_das_experiment
-#SBATCH --output=logs/experiment_%A_%a.out
-#SBATCH --error=logs/experiment_%A_%a.err
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=1
-#SBATCH --mem=32G
-#SBATCH --time=48:00:00
-#SBATCH --partition=plgrid-gpu-a100
-#SBATCH -A plgautopt26-gpu-a100
-#SBATCH --array=0-23           # Increased to 24 tasks total to split sequential runs
-
-# 1st argument: CDB_VAL (Default: 1.5)
-CDB_VAL=${1:-1.5}
-
-# 2nd argument: SEED (Default: 42)
-SEED=${2:-42}
-
-# Fixed PORTFOLIO variable
-PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
-
-# CONFIGURATION
-ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
-source "$ENV_PATH"
-mkdir -p logs
-
-# Array of Dimensions
-DIMS=(2 3 5 10)
-
-# 1. Dimension-specific CV-LOIO | RL-DAS (Indices 0-3)
-if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
-    MODE="CV-LOIO"
-    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
-    echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
-
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \
-      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED"
-
-# 2. Dimension-specific CV-LOIO | Policy Gradient (Indices 4-7)
-elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
-    MODE="CV-LOIO"
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
-    echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
-
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_DIM${DIM}_SEED${SEED} \
-      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S "$SEED"
-
-# 3. Dimension-specific CV-LOPO | RL-DAS (Indices 8-11)
-elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then
-    MODE="CV-LOPO"
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]}
-    echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
-
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \
-      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED"
-
-# 4. Dimension-specific CV-LOPO | Policy Gradient (Indices 12-15)
-elif [[ $SLURM_ARRAY_TASK_ID -ge 12 && $SLURM_ARRAY_TASK_ID -le 15 ]]; then
-    MODE="CV-LOPO"
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 12))]}
-    echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
-
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_DIM${DIM}_SEED${SEED} \
-      -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S "$SEED"
-
-# 5. Dimension-specific RL-DAS-random (Indices 16-19)
-elif [[ $SLURM_ARRAY_TASK_ID -ge 16 && $SLURM_ARRAY_TASK_ID -le 19 ]]; then
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 16))]}
-    echo "Running Mode: Random Agent - RLDAS variant | Dimension: $DIM"
-
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_DIM${DIM}_SEED${SEED} \
-      -p "${PORTFOLIO[@]}" --agent RL-DAS-random --dimensionality $DIM -S "$SEED"
-
-# 6. Multidimensional CV-LOIO (Index 20)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 20 ]]; then
-    MODE="CV-LOIO"
-    echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_SEED${SEED} \
-      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S "$SEED"
-
-# 7. Multidimensional CV-LOPO (Index 21)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 21 ]]; then
-    MODE="CV-LOPO"
-    echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_SEED${SEED} \
-      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S "$SEED"
-
-# 8. Global Random Agent (Index 22)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 22 ]]; then
-    echo "Running Mode: Global Random Agent"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL}_SEED${SEED} \
-      -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random -S "$SEED"
-
-# 9. Global Baselines (Index 23)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 23 ]]; then
-    echo "Running Mode: Baselines"
-    python3 dynamicalgorithmselection/main.py BASELINES \
-      -p "${PORTFOLIO[@]}" --mode baselines -S "$SEED"
-
-# 10. Multidimensional CV-LODO (Index 24)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 24 ]]; then
-    MODE="CV-LOPO"
-    echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_SEED${SEED} \
-      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S "$SEED"
-fi
\ No newline at end of file
diff --git a/single_algorithm_CDB_study.slurm b/single_algorithm_CDB_study.slurm
index 81512a5..156c626 100644
--- a/single_algorithm_CDB_study.slurm
+++ b/single_algorithm_CDB_study.slurm
@@ -8,27 +8,23 @@
 #SBATCH --time=48:00:00
 #SBATCH --partition=plgrid-gpu-a100
 #SBATCH -A plgautopt26-gpu-a100
-#SBATCH --array=0-9           # 10 tasks total
 
-CDB_VAL=1.5
-
-PORTFOLIO=('MADDE')
+SEED=${1:-42}
+CDB_VAL=${2:-1.5}
 
+PORTFOLIO=('G3PCX')
 PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
 
-
 # CONFIGURATION
 ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
 source "$ENV_PATH"
 mkdir -p logs
 
-# Array of Dimensions
-DIMS=(2 3 5 10)
-
+DIM=10
 MODE="CV-LOPO"
-DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
-echo "Running Mode: $MODE | Dimension: $DIM"
 
-python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM} \
+echo "Running Mode: $MODE | Dimension: $DIM | CDB: $CDB_VAL | SEED: $SEED"
+
+python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \
   -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \
-  --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+  --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient --seed $SEED
\ No newline at end of file