diff --git a/CDB_study.slurm b/CDB_study.slurm index 628326b..b1bd280 100644 --- a/CDB_study.slurm +++ b/CDB_study.slurm @@ -7,22 +7,15 @@ #SBATCH --mem=32G #SBATCH --time=48:00:00 #SBATCH --partition=plgrid-gpu-a100 -#SBATCH --array=0-9 # 10 tasks total +#SBATCH -A plgautopt26-gpu-a100 +#SBATCH --array=0-11 # 12 tasks total -CDB_VAL=${1:-1.5} +SEED=${1:-42} +CDB_VAL=${2:-1.5} -if [ "$#" -gt 0 ]; then - shift -fi - -if [ "$#" -eq 0 ]; then - PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP') -else - PORTFOLIO=("$@") -fi +PORTFOLIO=('G3PCX' 'LMCMAES' 'SPSOL') PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") - # CONFIGURATION ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate" source "$ENV_PATH" @@ -31,15 +24,17 @@ mkdir -p logs # Array of Dimensions DIMS=(2 3 5 10) +echo "Initializing Array Job $SLURM_ARRAY_TASK_ID | SEED: $SEED | CDB: $CDB_VAL | PORTFOLIO: ${PORTFOLIO[*]}" + # 1. Dimension-specific CV-LOIO (Indices 0-3) if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then MODE="CV-LOIO" DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} echo "Running Mode: $MODE | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \ - -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \ - --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \ + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S $SEED # 2. Dimension-specific CV-LOPO (Indices 4-7) elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then @@ -47,21 +42,35 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} echo "Running Mode: $MODE | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \ - -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \ - --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \ + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S $SEED # 3. Multidimensional CV-LOIO (Index 8) elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then MODE="CV-LOIO" echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \ - -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S $SEED # 4. Multidimensional CV-LOPO (Index 9) elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then MODE="CV-LOPO" echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \ - -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S $SEED + +# 5. Multidimensional CV-LODO (Index 10) +elif [[ $SLURM_ARRAY_TASK_ID -eq 10 ]]; then + MODE="CV-LODO" + echo "Running Mode: $MODE | Multidimensional PG" + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S $SEED + +# 6. Global Random Agent (Index 11) +elif [[ $SLURM_ARRAY_TASK_ID -eq 11 ]]; then + echo "Running Mode: Global Random Agent" + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random -S "$SEED" + fi \ No newline at end of file diff --git a/comprehensive_study.slurm b/comprehensive_study.slurm new file mode 100644 index 0000000..1384124 --- /dev/null +++ b/comprehensive_study.slurm @@ -0,0 +1,58 @@ +#!/bin/bash +#SBATCH --job-name=rl_das_experiment +#SBATCH --output=logs/experiment_%A_%a.out +#SBATCH --error=logs/experiment_%A_%a.err +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=32G +#SBATCH --time=48:00:00 +#SBATCH --partition=plgrid-gpu-a100 +#SBATCH -A plgautopt26-gpu-a100 +#SBATCH --array=0-12 # Increased to 13 tasks total to split sequential runs + +# 1st argument: SEED (Default: 42) +SEED=${1:-1.5} + +# Fixed PORTFOLIO variable +PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP') + +# CONFIGURATION +ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate" +source "$ENV_PATH" +mkdir -p logs + +# Array of Dimensions +DIMS=(2 3 5 10) + +# 1. Dimension-specific CV-LOIO | RL-DAS (Indices 0-3) +if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then + MODE="CV-LOIO" + DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} + echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM" + + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED" + +# 3. Dimension-specific CV-LOPO | RL-DAS (Indices 4-7) +elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then + MODE="CV-LOPO" + DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} + echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM" + + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED" + +# 5. Dimension-specific RL-DAS-random (Indices 8-11) +elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then + DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]} + echo "Running Mode: Random Agent - RLDAS variant | Dimension: $DIM" + + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --agent RL-DAS-random --dimensionality $DIM -S "$SEED" + +# 6. Global Baselines (Index 12) +elif [[ $SLURM_ARRAY_TASK_ID -eq 12 ]]; then + echo "Running Mode: Baselines" + python3 dynamicalgorithmselection/main.py BASELINES \ + -p "${PORTFOLIO[@]}" --mode baselines -S "$SEED" +fi \ No newline at end of file diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py index abecf65..2620080 100644 --- a/dynamicalgorithmselection/agents/agent.py +++ b/dynamicalgorithmselection/agents/agent.py @@ -4,13 +4,14 @@ from dynamicalgorithmselection.agents.agent_reward import AgentReward from dynamicalgorithmselection.agents.agent_state import ( - get_state_representation, StateNormalizer, + ela_state_representation, + BASE_STATE_SIZE, + AgentState, ) from dynamicalgorithmselection.agents.agent_utils import ( get_checkpoints, StepwiseRewardNormalizer, - MAX_DIM, ) from dynamicalgorithmselection.optimizers.Optimizer import Optimizer from dynamicalgorithmselection.optimizers.RestartOptimizer import restart_optimizer @@ -52,48 +53,28 @@ def __init__(self, problem, options): self.reward_normalizer = self.options.get( "reward_normalizer", StepwiseRewardNormalizer(max_steps=self.n_checkpoints) ) - self.state_representation, self.state_dim = get_state_representation( - self.options.get("state_representation", None), len(self.actions) - ) + n_actions = len(self.actions) + self.state_dim = BASE_STATE_SIZE + 2 * n_actions self.state_normalizer = self.options.get( "state_normalizer", StateNormalizer(input_shape=(self.state_dim,)) ) self.initial_value_range: Tuple[Optional[float], Optional[float]] = (None, None) self.reward_method = AgentReward(self.options.get("reward_option", 1)) - def get_partial_state( + def get_optimization_state( self, - x: Optional[np.ndarray], - y: Optional[np.ndarray], - optimization_state: bool = False, ) -> np.ndarray: - sr_additional_params = ( - self.lower_boundary, - self.upper_boundary, + state_representation_object = AgentState( + len(self.actions), self.choices_history, self.n_checkpoints, self.ndim_problem, ) - if x is None or y is None: - if self.options.get("state_representation") != "ELA": - state_representation = self.state_representation( - np.zeros((50, self.ndim_problem)), - np.zeros((50,)), - sr_additional_params, - ) - else: - state_representation = (np.zeros((43,)),) - - return np.append(state_representation, (0, 0) if optimization_state else ()) - used_fe = self.n_function_evaluations / self.max_function_evaluations - stagnation_coef = self.stagnation_count / self.max_function_evaluations + if len(self.choices_history) == 0: + return state_representation_object.get_initial_state() - state_representation = self.state_representation(x, y, sr_additional_params) - return np.append( - state_representation, - (used_fe, stagnation_coef) if optimization_state else (), - ) + return state_representation_object.get_state() def get_state( self, @@ -108,20 +89,21 @@ def get_state( indices = np.sort(indices) x_history = x_history[indices] y_history = y_history[indices] - - if self.options.get("state_representation") != "ELA": - landscape_state = self.get_partial_state(x_history, y_history).flatten() - optimization_state = self.get_partial_state(x, y, True).flatten() - state = np.concatenate((landscape_state, optimization_state)) + landscape_state = ela_state_representation(x_history, y_history) else: - partial_state = self.get_partial_state(x_history, y_history, True).flatten() - state = np.append( - partial_state, - ( - self.ndim_problem / MAX_DIM, - self.n_function_evaluations / self.max_function_evaluations, - ), + landscape_state = np.zeros( + 43, ) + + optimization_state = self.get_optimization_state() + state = np.concatenate((landscape_state, optimization_state)) + state = np.append( + state, + ( + self.n_function_evaluations / self.max_function_evaluations, + self.stagnation_count / self.max_function_evaluations, + ), + ) return self.state_normalizer.normalize(state, update) def _print_verbose_info(self, fitness, y): diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py index 3ad9d39..56d2fee 100644 --- a/dynamicalgorithmselection/agents/agent_state.py +++ b/dynamicalgorithmselection/agents/agent_state.py @@ -1,6 +1,4 @@ import warnings -from operator import itemgetter -from typing import Tuple, Callable, Any, Literal import numpy as np import pandas as pd @@ -15,29 +13,11 @@ from scipy.stats import spearmanr from dynamicalgorithmselection.agents.agent_utils import MAX_DIM, RunningMeanStd -BASE_STATE_SIZE = 102 +BASE_STATE_SIZE = 48 MAX_CONSIDERED_POPSIZE = 2500 -def get_state_representation( - name: Literal["ELA", "custom"], n_actions: int -) -> Tuple[Callable[[np.ndarray, np.ndarray, Any], np.ndarray], int]: - """ - :param name: name of the state representation mode - :param n_actions: number of actions to take - :return: function used to infer state representation from population and dimensionality of that state representation - """ - if name == "ELA": - return lambda x, y, *args: ela_state_representation(x, y), 47 - elif name == "custom": - return lambda x, y, args: AgentState( - x, y, n_actions, *args - ).get_state(), BASE_STATE_SIZE + 2 * n_actions + 2 - else: - raise ValueError("incorrect state representation") - - -def ela_state_representation(x, y, *args): +def ela_state_representation(x, y): with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -88,51 +68,19 @@ def ela_state_representation(x, y, *args): class AgentState: def __init__( self, - x: np.ndarray, - y: np.ndarray, n_actions, - lower_bound, - upper_bound, choice_history, n_checkpoints, n_dim_problem, ): - self.x = x - self.y = y self.n_actions = n_actions self.n_checkpoints = n_checkpoints self.ndim_problem = n_dim_problem - if x is None: - return - - best_idx = y.argmin() - worst_idx = y.argmax() - - self.best_x: np.ndarray = x[best_idx] - self.best_y: float = y[best_idx] - self.worst_x: np.ndarray = x[worst_idx] - self.worst_y: float = y[worst_idx] - self.lower_bound = lower_bound - self.upper_bound = upper_bound + if len(choice_history) < 1: + return # the rest of properties won't be needed self.choice_history = choice_history - self.y_normalized = (y - y.mean()) / (y.std() + 1e-6) - self.max_distance = distance(self.lower_bound, self.upper_bound) - self.sorted_indices = sorted( - [i for i, _ in enumerate(y)], key=lambda i: y[i] - ) # population indices sorted by fitness - self.measured_individuals = list( - itemgetter(*(min(i, len(y) - 1) for i in (1, 2, 3, 4, 5, 6, 9, 12, 15)))( - self.sorted_indices - ) - ) - self.x_mean = x.mean(axis=0) - self.population_relative = x - self.x_mean - self.normalized_x = (x - x.mean(axis=0)) / (x.std() + 1e-8) - self.x_std = 2 * x.std(axis=0) / (self.upper_bound - self.lower_bound) - - self.mean_historic_y = y.mean() self.last_action_index = ( self.choice_history[-1] if self.choice_history else None ) @@ -147,112 +95,6 @@ def __init__( for j in range(self.n_actions) ] - def get_weighted_central_moment(self, n: int): - norms_squared = np.linalg.norm( - self.population_relative, ord=2, axis=1 - ) # shape (pop,) - weights = self.get_fitness_weights() - exponent = n / 2 - numerator = min((weights * norms_squared**exponent).sum(), 1e8) - inertia_denom_w = np.linalg.norm(weights) - inertia_denom_n = np.linalg.norm(norms_squared**exponent) - return numerator / max(1e-5, inertia_denom_w * inertia_denom_n) - - def normalized_distance(self, x0: np.ndarray, x1: np.ndarray) -> float: - return float(min(np.linalg.norm(x0 - x1) / self.max_distance, 1.0)) - - def get_fitness_weights(self) -> np.ndarray: - weights = ( - (1.0 - (self.y - self.y.min()) / (self.y.max() - self.y.min())) - if (self.y.max() - self.y.min() > 1e-6) - else np.ones_like(self.y) - ) - return weights / weights.sum() - - def population_relative_radius(self) -> float: - population_radius = np.linalg.norm(self.x.max(axis=0) - self.x.min(axis=0)) - return float(population_radius / self.max_distance) - - def slopes_stats(self) -> tuple: - return get_list_stats( - [ - inverse_scaling( - max(self.y_normalized[j] - self.y_normalized[i], 1) - / (self.normalized_distance(self.x[i], self.x[j]) + 1e-6) - ) - for i, j in zip(self.sorted_indices, self.sorted_indices[1:]) - ] - ) - - def y_difference_stats(self) -> tuple: - return get_list_stats( - [ - min(self.y_normalized[j] - self.y_normalized[i], 1) - for i, j in zip(self.sorted_indices, self.sorted_indices[1:]) - ] - ) - - def distances_from_best(self) -> list: - return [ - self.normalized_distance(self.x[i], self.best_x) - for i in self.measured_individuals + [0, -1] - ] - - def distances_from_mean(self) -> list: - return [ - self.normalized_distance(self.x[i], self.x_mean) - for i in self.measured_individuals + [0, -1] - ] - - def explored_volume(self) -> float: - return np.prod( - (self.x.max(axis=0) - self.x.min(axis=0)) - / (self.upper_bound - self.lower_bound) - ) - - def relative_improvement(self): - return max(0.0, (np.min(self.y) - self.best_y)) / ( - (self.worst_y - self.best_y) or 1.0 - ) - - def normalized_x_stats(self) -> tuple: - return ( - np.clip((self.normalized_x**2).mean(), -1, 1), - np.clip((self.normalized_x**2).min(), -1, 1), - np.clip((self.normalized_x**2).max(), -1, 1), - np.clip((self.normalized_x**2).std(), -1, 1), - ) - - def relative_y_differences(self) -> list: - return [ - (self.y[i] - np.min(self.y)) / max((self.worst_y - self.best_y), 1e-6) - for i in self.measured_individuals - ] - - def x_standard_deviation_stats(self) -> tuple: - return ( - self.x_std.max(), - self.x_std.min(), - self.x_std.mean(), - 2 * self.x_std.std(), - ) - - def y_historic_improvement(self) -> float: - return (self.mean_historic_y - self.best_y) / ( - (self.worst_y - self.best_y) or 1 - ) - - def y_deviation(self) -> float: - middle_y = (self.worst_y - self.best_y) / 2 - max_possible_std = self.best_y - middle_y - # dividing twice by std instead of variance due to numerical instability issues - return ( - sum((i - self.mean_historic_y) ** 2 for i in self.y) - / len(self.y) - / max_possible_std - / max_possible_std - ) - def choice_entropy(self) -> float: return -( np.array(self.choices_frequency) @@ -268,57 +110,26 @@ def same_action_counter(self) -> int: break return same_action_counter - def mean_falling_behind(self) -> float: - return (self.y - self.best_y).mean() / ( - max((self.y.max() - self.best_y), (self.y - self.best_y).mean()) or 1 - ) - def get_initial_state(self): vector = [ - 0.0, # third weighted central moment - 0.0, # second weighted central moment - 0.0, # normalized domination of best solution - 0.0, # normalized radius of the smallest sphere containing entire population - 0.5, # normalized relative fitness difference - 0.5, # average_y relative to best - 1.0, # normalized y deviation measure - 1.0, # full remaining budget (max evaluations) - 0.0, # stagnation count - *([0.0] * (51 + 2 * self.n_actions)), - self.ndim_problem / 40, # normalized problem dimension + *(0.0 for _ in range(self.n_actions)), # last action encoded + 0.0, # same action counter + *(0.0 for _ in range(self.n_actions)), # choices frequency + 0.0, # choice entropy + self.ndim_problem / MAX_DIM, # normalized problem dimension ] return np.array(vector, dtype=np.float32) - def get_state(self, optimization_status=False) -> np.ndarray: - if len(self.x) < 1: + def get_state(self) -> np.ndarray: + if len(self.choice_history) < 1: return self.get_initial_state() else: vector = [ - self.get_weighted_central_moment(3), - self.get_weighted_central_moment(2), - self.mean_falling_behind(), - self.population_relative_radius(), - self.relative_improvement(), - self.y_historic_improvement(), - self.y_deviation(), - *self.distances_from_best(), - *self.distances_from_mean(), - *self.relative_y_differences(), - *(self.last_action_encoded if optimization_status else ()), - *( - (self.same_action_counter() / self.n_checkpoints,) - if optimization_status - else () - ), - *(self.choices_frequency if optimization_status else ()), - self.explored_volume() ** (1 / self.ndim_problem), # searched volume - *self.x_standard_deviation_stats(), - *self.normalized_x_stats(), - *((self.choice_entropy(),) if optimization_status else ()), - self.normalized_distance(self.best_x, self.worst_x), - *self.y_difference_stats(), - *self.slopes_stats(), - *((self.ndim_problem / MAX_DIM,) if optimization_status else ()), + *self.last_action_encoded, + self.same_action_counter() / self.n_checkpoints, + *self.choices_frequency, + self.choice_entropy(), + self.ndim_problem / MAX_DIM, ] return np.array(vector, dtype=np.float32) @@ -327,19 +138,6 @@ def distance(x0: np.ndarray, x1: np.ndarray) -> float: return float(np.linalg.norm(x0 - x1)) -def inverse_scaling(x): - # Monotonic increacing in [0, inf) function that is bounded in [0, 1) - return x / (x + 5) - - -def get_list_stats(data: list): - return ( - max(data), - min(data), - sum(data) / len(data), - ) - - class StateNormalizer: def __init__(self, input_shape): self.rms = RunningMeanStd(shape=input_shape) diff --git a/dynamicalgorithmselection/agents/policy_gradient_agent.py b/dynamicalgorithmselection/agents/policy_gradient_agent.py index e70a2dc..b455965 100644 --- a/dynamicalgorithmselection/agents/policy_gradient_agent.py +++ b/dynamicalgorithmselection/agents/policy_gradient_agent.py @@ -210,14 +210,13 @@ def _collect(self, fitness, y=None): return results, agent_state - def _prepare_state_tensor(self, x, y, full_buffer): + def _prepare_state_tensor(self, x, y): """Generates and normalizes the state tensor using self.iterations_history.""" state = self.get_state( x, y, self.iterations_history["x"], self.iterations_history["y"], - self.train_mode and not full_buffer, ) state = torch.nan_to_num( torch.tensor(state), nan=0.5, neginf=0.0, posinf=1.0 @@ -322,7 +321,7 @@ def optimize(self, fitness_function=None, args=None): full_buffer = self.buffer.size() >= self.buffer.capacity # Prepare State (uses self.iterations_history internally) - state = self._prepare_state_tensor(x, y, full_buffer) + state = self._prepare_state_tensor(x, y) # Select Action action, log_prob, value = self._select_action(state, full_buffer) diff --git a/dynamicalgorithmselection/main.py b/dynamicalgorithmselection/main.py index 6480d1f..a04bc6c 100644 --- a/dynamicalgorithmselection/main.py +++ b/dynamicalgorithmselection/main.py @@ -105,15 +105,6 @@ def parse_arguments(): help="specify which agent to use", ) - parser.add_argument( - "-r", - "--state-representation", - type=str, - default="ELA", - choices=["ELA", "custom"], - help="specify which state representation to use", - ) - parser.add_argument( "-x", "--cdb", @@ -183,7 +174,6 @@ def print_info(args): print("Weights and Biases project: ", args.wandb_project) print("Agent type: ", args.agent if args.mode != "baselines" else None) print("Exponential checkpoint division base: ", args.cdb) - print("State representation variant: ", args.state_representation) print("Forcing restarts: ", args.force_restarts) print("Dimensionality of problems: ", args.dimensionality) print("Number of training epochs: ", args.n_epochs) @@ -195,7 +185,6 @@ def common_options(args) -> Dict[str, Any]: "n_checkpoints": args.n_checkpoints, "n_individuals": args.population_size, "cdb": args.cdb, - "state_representation": args.state_representation, "force_restarts": args.force_restarts, "dimensionality": args.dimensionality, "n_epochs": args.n_epochs, diff --git a/portfolio_study.slurm b/portfolio_study.slurm index b66c396..7a23fe8 100644 --- a/portfolio_study.slurm +++ b/portfolio_study.slurm @@ -10,12 +10,18 @@ #SBATCH -A plgautopt26-gpu-a100 #SBATCH --array=0-9 # 10 tasks total -CDB_VAL=1.5 +CDB_VAL=1.2 -PORTFOLIO=('MADDE' 'CMAES' 'SPSO') +SEED=${1:-42} -PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") +# If no additional arguments are passed, use the default portfolio +if [ "$#" -lt 2 ]; then + PORTFOLIO=('G3PCX' 'LMCMAES' 'SPSO') +else + PORTFOLIO=("${@:2}") +fi +PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") # CONFIGURATION ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate" @@ -25,15 +31,17 @@ mkdir -p logs # Array of Dimensions DIMS=(2 3 5 10) +echo "Initializing Array Job $SLURM_ARRAY_TASK_ID | SEED: $SEED | PORTFOLIO: ${PORTFOLIO[*]}" + # 1. Dimension-specific CV-LOIO (Indices 0-3) if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then MODE="CV-LOIO" DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} echo "Running Mode: $MODE | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_$_CDB${CDB_VAL}_DIM${DIM} \ + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \ -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \ - --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S $SEED # 2. Dimension-specific CV-LOPO (Indices 4-7) elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then @@ -41,21 +49,21 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} echo "Running Mode: $MODE | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM} \ + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \ -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \ - --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S $SEED # 3. Multidimensional CV-LOIO (Index 8) elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then MODE="CV-LOIO" echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL} \ - -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient --dimensionality 2 3 5 10 -S $SEED # 4. Multidimensional CV-LOPO (Index 9) elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then MODE="CV-LOPO" echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL} \ - -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_CDB${CDB_VAL}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient --dimensionality 2 3 5 10 -S $SEED fi \ No newline at end of file diff --git a/runner.sh b/runner.sh new file mode 100644 index 0000000..2d88205 --- /dev/null +++ b/runner.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +SEEDS=(123 234 345 456) + +CDB_VALUES=(1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7) + +PORTFOLIOS=( + "CMAES SPSOL OPOA2015" + "G3PCX LMCMAES SPSO" + "MADDE JDE21 NL_SHADE_RSP" +) + +echo "Starting Cartesian product job submissions..." + +for SEED in "${SEEDS[@]}"; do + for PORTFOLIO in "${PORTFOLIOS[@]}"; do + + echo "Submitting portfolio study with: SEED=${SEED} | PORTFOLIO=${PORTFOLIO}" + + sbatch portfolio_study.slurm $SEED $PORTFOLIO + + sleep 1 + + done + + for CDB in "${CDB_VALUES[@]}"; do + + echo "Submitting single algorithm study with: SEED=${SEED} | CDB=${CDB}" + sbatch single_algorithm_CDB_study.slurm $SEED $CDB + echo "Submitting CDB study with: SEED=${SEED} | CDB=${CDB}" + sbatch CDB_study.slurm $SEED $CDB + + sleep 1 + + done + + echo "Submitting comprehensive study with: SEED=${SEED}" + sbatch comprehensive_study.slurm $SEED + +done + +echo "All jobs submitted!" \ No newline at end of file diff --git a/runner.slurm b/runner.slurm deleted file mode 100644 index 12c7916..0000000 --- a/runner.slurm +++ /dev/null @@ -1,108 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=rl_das_experiment -#SBATCH --output=logs/experiment_%A_%a.out -#SBATCH --error=logs/experiment_%A_%a.err -#SBATCH --ntasks=1 -#SBATCH --cpus-per-task=1 -#SBATCH --mem=32G -#SBATCH --time=48:00:00 -#SBATCH --partition=plgrid-gpu-a100 -#SBATCH -A plgautopt26-gpu-a100 -#SBATCH --array=0-23 # Increased to 24 tasks total to split sequential runs - -# 1st argument: CDB_VAL (Default: 1.5) -CDB_VAL=${1:-1.5} - -# 2nd argument: SEED (Default: 42) -SEED=${2:-42} - -# Fixed PORTFOLIO variable -PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP') - -# CONFIGURATION -ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate" -source "$ENV_PATH" -mkdir -p logs - -# Array of Dimensions -DIMS=(2 3 5 10) - -# 1. Dimension-specific CV-LOIO | RL-DAS (Indices 0-3) -if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then - MODE="CV-LOIO" - DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} - echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM" - - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED" - -# 2. Dimension-specific CV-LOIO | Policy Gradient (Indices 4-7) -elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then - MODE="CV-LOIO" - DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} - echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM" - - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_DIM${DIM}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \ - --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S "$SEED" - -# 3. Dimension-specific CV-LOPO | RL-DAS (Indices 8-11) -elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then - MODE="CV-LOPO" - DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]} - echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM" - - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED" - -# 4. Dimension-specific CV-LOPO | Policy Gradient (Indices 12-15) -elif [[ $SLURM_ARRAY_TASK_ID -ge 12 && $SLURM_ARRAY_TASK_ID -le 15 ]]; then - MODE="CV-LOPO" - DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 12))]} - echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM" - - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_DIM${DIM}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \ - --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S "$SEED" - -# 5. Dimension-specific RL-DAS-random (Indices 16-19) -elif [[ $SLURM_ARRAY_TASK_ID -ge 16 && $SLURM_ARRAY_TASK_ID -le 19 ]]; then - DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 16))]} - echo "Running Mode: Random Agent - RLDAS variant | Dimension: $DIM" - - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_DIM${DIM}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --agent RL-DAS-random --dimensionality $DIM -S "$SEED" - -# 6. Multidimensional CV-LOIO (Index 20) -elif [[ $SLURM_ARRAY_TASK_ID -eq 20 ]]; then - MODE="CV-LOIO" - echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S "$SEED" - -# 7. Multidimensional CV-LOPO (Index 21) -elif [[ $SLURM_ARRAY_TASK_ID -eq 21 ]]; then - MODE="CV-LOPO" - echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S "$SEED" - -# 8. Global Random Agent (Index 22) -elif [[ $SLURM_ARRAY_TASK_ID -eq 22 ]]; then - echo "Running Mode: Global Random Agent" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random -S "$SEED" - -# 9. Global Baselines (Index 23) -elif [[ $SLURM_ARRAY_TASK_ID -eq 23 ]]; then - echo "Running Mode: Baselines" - python3 dynamicalgorithmselection/main.py BASELINES \ - -p "${PORTFOLIO[@]}" --mode baselines -S "$SEED" - -# 10. Multidimensional CV-LODO (Index 24) -elif [[ $SLURM_ARRAY_TASK_ID -eq 24 ]]; then - MODE="CV-LOPO" - echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_SEED${SEED} \ - -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S "$SEED" -fi \ No newline at end of file diff --git a/single_algorithm_CDB_study.slurm b/single_algorithm_CDB_study.slurm index 81512a5..156c626 100644 --- a/single_algorithm_CDB_study.slurm +++ b/single_algorithm_CDB_study.slurm @@ -8,27 +8,23 @@ #SBATCH --time=48:00:00 #SBATCH --partition=plgrid-gpu-a100 #SBATCH -A plgautopt26-gpu-a100 -#SBATCH --array=0-9 # 10 tasks total -CDB_VAL=1.5 - -PORTFOLIO=('MADDE') +SEED=${1:-42} +CDB_VAL=${2:-1.5} +PORTFOLIO=('G3PCX') PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") - # CONFIGURATION ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate" source "$ENV_PATH" mkdir -p logs -# Array of Dimensions -DIMS=(2 3 5 10) - +DIM=10 MODE="CV-LOPO" -DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} -echo "Running Mode: $MODE | Dimension: $DIM" -python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM} \ +echo "Running Mode: $MODE | Dimension: $DIM | CDB: $CDB_VAL | SEED: $SEED" + +python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_CDB${CDB_VAL}_DIM${DIM}_SEED${SEED} \ -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \ - --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient --seed $SEED \ No newline at end of file