From de90dc9bd3b2ff0d929531361fb0db22ee2ee502 Mon Sep 17 00:00:00 2001 From: wniec Date: Fri, 27 Feb 2026 19:33:13 +0100 Subject: [PATCH 1/3] minor fixes in runners --- README.md | 2 +- .../agents/RLDAS_agent.py | 1 - .../agents/agent_utils.py | 4 -- reward_study.slurm | 67 +++++++++++++++++++ runner.slurm | 6 +- 5 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 reward_study.slurm diff --git a/README.md b/README.md index 5469d65..323a285 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ uv run das [options] | `-x`, `--cdb` | `float` | `1.0` | **Checkpoint Division Exponent**; determines how quickly checkpoint length increases. | | `-r`, `--state-representation` | `str` | `ELA` | Method used to extract features from the algorithm population. | | `-d`, `--force-restarts` | `bool` | `False` | Enable selection of forcibly restarting optimizers. | -| `-D`, `--dimensionality` | `int` | `None` | Dimensionality of problems. | +| `-D`, `--dimensionality` | `list[int]` | `[2, 3, 5, 10, 20, 40]` | Dimensionality of problems. | | `-E`, `--n_epochs` | `int` | `1` | Number of training epochs. | | `-O`, `--reward-option` | `int` | `1` | ID of method used to compute reward. | diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py index f02bc47..7cfc97a 100644 --- a/dynamicalgorithmselection/agents/RLDAS_agent.py +++ b/dynamicalgorithmselection/agents/RLDAS_agent.py @@ -272,7 +272,6 @@ def optimize(self, fitness_function=None, args=None): self._n_generations += 1 self._print_verbose_info(fitness, self.best_so_far_y) - print(self._n_generations) fes_end = self.n_function_evaluations speed_factor = self.max_function_evaluations / fes_end diff --git a/dynamicalgorithmselection/agents/agent_utils.py b/dynamicalgorithmselection/agents/agent_utils.py index 0bb2f23..019eefa 100644 --- a/dynamicalgorithmselection/agents/agent_utils.py +++ b/dynamicalgorithmselection/agents/agent_utils.py @@ -1,5 +1,3 @@ -from typing import Optional - import numpy as np MAX_DIM = 40 @@ -14,7 +12,6 @@ def get_runtime_stats( """ :param fitness_history: list of tuples [fe, fitness] with only points where best so far fitness improved :param function_evaluations: max number of function evaluations during run. - :param checkpoints: list of checkpoints by their n_function_evaluations :return: dictionary of selected run statistics, ready to dump """ area_under_optimization_curve = 0.0 @@ -43,7 +40,6 @@ def get_extreme_stats( """ :param fitness_histories: list of lists of tuples [fe, fitness] with only points where best so far fitness improved for each algorithm :param function_evaluations: max number of function evaluations during run. - :param checkpoints: list of checkpoints by their n_function_evaluations :return: dictionary of selected run statistics, ready to dump """ all_improvements = [] diff --git a/reward_study.slurm b/reward_study.slurm new file mode 100644 index 0000000..ff7ac0f --- /dev/null +++ b/reward_study.slurm @@ -0,0 +1,67 @@ +#!/bin/bash +#SBATCH --job-name=rl_das_experiment +#SBATCH --output=logs/experiment_%A_%a.out +#SBATCH --error=logs/experiment_%A_%a.err +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=32G +#SBATCH --time=48:00:00 +#SBATCH --partition=plgrid-gpu-a100 +#SBATCH --array=0-9 # 10 tasks total + +CDB_VAL=${1:-1.5} + +if [ "$#" -gt 0 ]; then + shift +fi + +if [ "$#" -eq 0 ]; then + PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP') +else + PORTFOLIO=("$@") +fi +PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") + + +# CONFIGURATION +ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate" +source "$ENV_PATH" +mkdir -p logs + +# Array of Dimensions +DIMS=(2 3 5 10) + +# 1. Dimension-specific CV-LOIO (Indices 0-3) +if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then + MODE="CV-LOIO" + DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} + echo "Running Mode: $MODE | Dimension: $DIM" + + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \ + -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \ + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + +# 2. Dimension-specific CV-LOPO (Indices 4-7) +elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then + MODE="CV-LOPO" + DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} + echo "Running Mode: $MODE | Dimension: $DIM" + + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \ + -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \ + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + +# 3. Multidimensional CV-LOIO (Index 8) +elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then + MODE="CV-LOIO" + echo "Running Mode: $MODE | Multidimensional PG" + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \ + -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient + +# 4. Multidimensional CV-LOPO (Index 9) +elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then + MODE="CV-LOPO" + echo "Running Mode: $MODE | Multidimensional PG" + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \ + -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient +fi \ No newline at end of file diff --git a/runner.slurm b/runner.slurm index d0bbca4..7410b16 100644 --- a/runner.slurm +++ b/runner.slurm @@ -63,7 +63,7 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 12 && $SLURM_ARRAY_TASK_ID -le 15 ]]; then echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM" python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \ - -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \ + -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \ --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient # 5. Dimension-specific RL-DAS-random (Indices 16-19) @@ -79,14 +79,14 @@ elif [[ $SLURM_ARRAY_TASK_ID -eq 20 ]]; then MODE="CV-LOIO" echo "Running Mode: $MODE | Multidimensional PG" python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \ - -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient + -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient # 7. Multidimensional CV-LOPO (Index 21) elif [[ $SLURM_ARRAY_TASK_ID -eq 21 ]]; then MODE="CV-LOPO" echo "Running Mode: $MODE | Multidimensional PG" python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \ - -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient + -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient # 8. Global Random Agent (Index 22) elif [[ $SLURM_ARRAY_TASK_ID -eq 22 ]]; then From 1c770aaf620df0476c6529e68eea907b7716bb88 Mon Sep 17 00:00:00 2001 From: wniec Date: Tue, 3 Mar 2026 22:36:49 +0100 Subject: [PATCH 2/3] new experiment variants --- CDB_study.slurm | 4 +-- portfolio_study.slurm | 21 +++++------ runner.slurm | 54 ++++++++++++++-------------- single_algorithm_CDB_study.slurm | 62 ++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 43 deletions(-) create mode 100644 single_algorithm_CDB_study.slurm diff --git a/CDB_study.slurm b/CDB_study.slurm index ff7ac0f..628326b 100644 --- a/CDB_study.slurm +++ b/CDB_study.slurm @@ -37,7 +37,7 @@ if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} echo "Running Mode: $MODE | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \ + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \ -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \ --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient @@ -47,7 +47,7 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} echo "Running Mode: $MODE | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \ + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \ -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \ --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient diff --git a/portfolio_study.slurm b/portfolio_study.slurm index 4282edd..d320d2f 100644 --- a/portfolio_study.slurm +++ b/portfolio_study.slurm @@ -9,17 +9,12 @@ #SBATCH --partition=plgrid-gpu-a100 #SBATCH --array=0-9 # 10 tasks total -CDB_VAL=${1:-1.5} +REWARD_OPTION=${1:-1} -if [ "$#" -gt 0 ]; then - shift -fi +CDB_VAL=1.5 + +PORTFOLIO=('MADDE' 'CMAES' 'SPSO') -if [ "$#" -eq 0 ]; then - PORTFOLIO=('MADDE' 'CMAES' 'SPSO') -else - PORTFOLIO=("$@") -fi PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") @@ -37,7 +32,7 @@ if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} echo "Running Mode: $MODE | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \ + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_$_REWARD_${REWARD_OPTION}_DIM${DIM} \ -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \ --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient @@ -47,7 +42,7 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} echo "Running Mode: $MODE | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \ + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_REWARD_${REWARD_OPTION}_DIM${DIM} \ -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \ --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient @@ -55,13 +50,13 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then MODE="CV-LOIO" echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \ + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_REWARD_${REWARD_OPTION} \ -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient # 4. Multidimensional CV-LOPO (Index 9) elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then MODE="CV-LOPO" echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \ + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_REWARD_${REWARD_OPTION} \ -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient fi \ No newline at end of file diff --git a/runner.slurm b/runner.slurm index 7410b16..9aeb115 100644 --- a/runner.slurm +++ b/runner.slurm @@ -9,15 +9,13 @@ #SBATCH --partition=plgrid-gpu-a100 #SBATCH --array=0-23 # Increased to 24 tasks total to split sequential runs +# 1st argument: CDB_VAL (Default: 1.5) CDB_VAL=${1:-1.5} -if [ "$#" -gt 0 ]; then - shift -fi - -# Store the remaining arguments as an array called PORTFOLIO. -# If no additional arguments were provided, fall back to your default. +# 2nd argument: SEED (Default: 42) +SEED=${2:-42} +# Fixed PORTFOLIO variable PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP') # CONFIGURATION @@ -31,72 +29,72 @@ DIMS=(2 3 5 10) # 1. Dimension-specific CV-LOIO | RL-DAS (Indices 0-3) if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then MODE="CV-LOIO" - DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} + DIM=DIM${DIMS[$SLURM_ARRAY_TASK_ID]} echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \ - -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED" # 2. Dimension-specific CV-LOIO | Policy Gradient (Indices 4-7) elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then MODE="CV-LOIO" - DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} + DIM=DIM${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \ + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_DIM${DIM}_SEED${SEED} \ -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \ - --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S "$SEED" # 3. Dimension-specific CV-LOPO | RL-DAS (Indices 8-11) elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then MODE="CV-LOPO" - DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]} + DIM=DIM${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]} echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \ - -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED" # 4. Dimension-specific CV-LOPO | Policy Gradient (Indices 12-15) elif [[ $SLURM_ARRAY_TASK_ID -ge 12 && $SLURM_ARRAY_TASK_ID -le 15 ]]; then MODE="CV-LOPO" - DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 12))]} + DIM=DIM${DIMS[$((SLURM_ARRAY_TASK_ID - 12))]} echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \ + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_DIM${DIM}_SEED${SEED} \ -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \ - --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S "$SEED" # 5. Dimension-specific RL-DAS-random (Indices 16-19) elif [[ $SLURM_ARRAY_TASK_ID -ge 16 && $SLURM_ARRAY_TASK_ID -le 19 ]]; then - DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 16))]} + DIM=DIM${DIMS[$((SLURM_ARRAY_TASK_ID - 16))]} echo "Running Mode: Random Agent - RLDAS variant | Dimension: $DIM" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_${DIM} \ - -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --agent RL-DAS-random --dimensionality $DIM + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_DIM${DIM}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --agent RL-DAS-random --dimensionality $DIM -S "$SEED" # 6. Multidimensional CV-LOIO (Index 20) elif [[ $SLURM_ARRAY_TASK_ID -eq 20 ]]; then MODE="CV-LOIO" echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \ - -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S "$SEED" # 7. Multidimensional CV-LOPO (Index 21) elif [[ $SLURM_ARRAY_TASK_ID -eq 21 ]]; then MODE="CV-LOPO" echo "Running Mode: $MODE | Multidimensional PG" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \ - -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S "$SEED" # 8. Global Random Agent (Index 22) elif [[ $SLURM_ARRAY_TASK_ID -eq 22 ]]; then echo "Running Mode: Global Random Agent" - python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL} \ - -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random + python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL}_SEED${SEED} \ + -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random -S "$SEED" # 9. Global Baselines (Index 23) elif [[ $SLURM_ARRAY_TASK_ID -eq 23 ]]; then echo "Running Mode: Baselines" python3 dynamicalgorithmselection/main.py BASELINES \ - -p "${PORTFOLIO[@]}" --mode baselines + -p "${PORTFOLIO[@]}" --mode baselines -S "$SEED" fi \ No newline at end of file diff --git a/single_algorithm_CDB_study.slurm b/single_algorithm_CDB_study.slurm new file mode 100644 index 0000000..c7c0448 --- /dev/null +++ b/single_algorithm_CDB_study.slurm @@ -0,0 +1,62 @@ +#!/bin/bash +#SBATCH --job-name=rl_das_experiment +#SBATCH --output=logs/experiment_%A_%a.out +#SBATCH --error=logs/experiment_%A_%a.err +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=32G +#SBATCH --time=48:00:00 +#SBATCH --partition=plgrid-gpu-a100 +#SBATCH --array=0-9 # 10 tasks total + +REWARD_OPTION=${1:-1} + +CDB_VAL=1.5 + +PORTFOLIO=('MADDE') + +PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}") + + +# CONFIGURATION +ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate" +source "$ENV_PATH" +mkdir -p logs + +# Array of Dimensions +DIMS=(2 3 5 10) + +# 1. Dimension-specific CV-LOIO (Indices 0-3) +if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then + MODE="CV-LOIO" + DIM=${DIMS[$SLURM_ARRAY_TASK_ID]} + echo "Running Mode: $MODE | Dimension: $DIM" + + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_DIM${DIM} \ + -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \ + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + +# 2. Dimension-specific CV-LOPO (Indices 4-7) +elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then + MODE="CV-LOPO" + DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]} + echo "Running Mode: $MODE | Dimension: $DIM" + + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_DIM${DIM} \ + -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \ + --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient + +# 3. Multidimensional CV-LOIO (Index 8) +elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then + MODE="CV-LOIO" + echo "Running Mode: $MODE | Multidimensional PG" + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE} \ + -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient + +# 4. Multidimensional CV-LOPO (Index 9) +elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then + MODE="CV-LOPO" + echo "Running Mode: $MODE | Multidimensional PG" + python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE} \ + -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient +fi \ No newline at end of file From add23f995b6ad573e5ecf7d819047ff1321f6b2c Mon Sep 17 00:00:00 2001 From: wniec Date: Thu, 5 Mar 2026 00:41:03 +0100 Subject: [PATCH 3/3] fix RL-DAS implementation --- .../agents/RLDAS_agent.py | 46 ++++---- .../agents/RLDAS_random_agent.py | 9 -- .../agents/agent_state.py | 19 ++- dynamicalgorithmselection/agents/ppo_utils.py | 110 +++++++++++++----- dynamicalgorithmselection/experiments/core.py | 36 +++--- 5 files changed, 137 insertions(+), 83 deletions(-) diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py index 7cfc97a..0e5dfb9 100644 --- a/dynamicalgorithmselection/agents/RLDAS_agent.py +++ b/dynamicalgorithmselection/agents/RLDAS_agent.py @@ -87,6 +87,8 @@ def _update_ah_history( self.ah_vectors[alg_idx, 1] * H + sv_worst_current ) / (H + 1) + # Here I am computing current average + self.alg_usage_counts[alg_idx] += 1 def _save_context(self, optimizer, alg_name): @@ -167,25 +169,20 @@ def optimize(self, fitness_function=None, args=None): population_x, population_y = self.initialize() self.n_function_evaluations = INITIAL_POPSIZE - best_idx = np.argmin(population_y) - best_y_global = population_y[best_idx] - best_x_global = population_x[best_idx].copy() - - self.best_so_far_y = best_y_global - self.best_so_far_x = best_x_global - self.history.append(self.best_so_far_y) fitness.append(float(self.best_so_far_y)) - self.initial_cost = best_y_global if abs(best_y_global) > 1e-8 else 1.0 + self.initial_cost = ( + self.best_so_far_y if abs(self.best_so_far_y) > 1e-8 else 1.0 + ) self.ah_vectors.fill(0.0) self.alg_usage_counts.fill(0.0) self.context_memory = {name: {} for name in self.alg_names} self.context_memory["Common"] = {} - + cost_new, cost_old = float(np.min(population_y)), float(np.min(population_y)) trajectory = [] - + clip_eps = self.options.get("ppo_eps", 0.3) while self.n_function_evaluations < self.max_function_evaluations: state = self.get_state(population_x, population_y) @@ -203,7 +200,7 @@ def optimize(self, fitness_function=None, args=None): x_best_old = population_x[np.argmin(population_y)].copy() x_worst_old = population_x[np.argmax(population_y)].copy() - cost_old = np.copy(np.min(population_y)) + cost_old = float(cost_new) target_fes = min( self.n_function_evaluations + self.schedule_interval, @@ -237,13 +234,18 @@ def optimize(self, fitness_function=None, args=None): x_best_new: np.ndarray = population_x[np.argmin(population_y)].copy() x_worst_new: np.ndarray = population_x[np.argmax(population_y)].copy() - cost_new: float = np.min(population_y) + cost_new: float = self.best_so_far_y self._update_ah_history( action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new ) - adc = (cost_old - cost_new) / self.initial_cost + # Update Agent Best State and History + if cost_new < self.best_so_far_y: + self.best_so_far_y = cost_new + self.best_so_far_x = x_best_new + + adc = (cost_old - self.best_so_far_y) / self.initial_cost if self.run: self.run.log({"adc": adc}) @@ -260,23 +262,15 @@ def optimize(self, fitness_function=None, args=None): } ) - best_y_global = min(best_y_global, cost_new) - - # Update Agent Best State and History - if cost_new < self.best_so_far_y: - self.best_so_far_y = cost_new - self.best_so_far_x = x_best_new - self.history.append(self.best_so_far_y) fitness.append(float(self.best_so_far_y)) self._n_generations += 1 self._print_verbose_info(fitness, self.best_so_far_y) - fes_end = self.n_function_evaluations - speed_factor = self.max_function_evaluations / fes_end + speed_factor = self.max_function_evaluations / self.n_function_evaluations for step in trajectory: - final_reward = step["adc"] * speed_factor + final_reward = max(step["adc"] * speed_factor, 0) self.rewards.append(final_reward) la_state, ah_state = step["state"] @@ -300,7 +294,7 @@ def optimize(self, fitness_function=None, args=None): self.buffer, epochs=K, minibatch_size=32, - clip_eps=0.2, + clip_eps=clip_eps, value_coef=0.5, entropy_coef=0.01, ) @@ -336,7 +330,7 @@ def ppo_update( buffer, epochs=4, minibatch_size=None, - clip_eps=0.2, + clip_eps=0.3, value_coef=0.5, entropy_coef=0.01, ): @@ -405,5 +399,5 @@ def ppo_update( self.optimizer.zero_grad() loss.backward() - torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.5) + torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.1) self.optimizer.step() diff --git a/dynamicalgorithmselection/agents/RLDAS_random_agent.py b/dynamicalgorithmselection/agents/RLDAS_random_agent.py index 71a55b5..fe08eca 100644 --- a/dynamicalgorithmselection/agents/RLDAS_random_agent.py +++ b/dynamicalgorithmselection/agents/RLDAS_random_agent.py @@ -110,13 +110,6 @@ def optimize(self, fitness_function=None, args=None): population_x, population_y = self.initialize() self.n_function_evaluations = INITIAL_POPSIZE - best_idx = np.argmin(population_y) - best_y_global = population_y[best_idx] - best_x_global = population_x[best_idx].copy() - - self.best_so_far_y = best_y_global - self.best_so_far_x = best_x_global - self.history.append(self.best_so_far_y) fitness.append(float(self.best_so_far_y)) @@ -179,8 +172,6 @@ def optimize(self, fitness_function=None, args=None): action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new ) - best_y_global = min(best_y_global, cost_new) - if cost_new < self.best_so_far_y: self.best_so_far_y = cost_new self.best_so_far_x = x_best_new diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py index 856dc9e..3ccb6c8 100644 --- a/dynamicalgorithmselection/agents/agent_state.py +++ b/dynamicalgorithmselection/agents/agent_state.py @@ -373,6 +373,21 @@ def normalize(self, state, update=True): return np.clip(normalized_state, -5.0, 5.0) +def negative_slope_coefficient(group_cost, sample_cost): # [j] + gs = sample_cost.shape[0] + m = 10 + gs -= gs % m # to be divisible + if gs < m: # not enough costs for m dividing + return 0 + sorted_cost = np.array(sorted(list(zip(group_cost[:gs], sample_cost[:gs])))) + sorted_group = sorted_cost[:, 0].reshape(m, -1) + sorted_sample = sorted_cost[:, 1].reshape(m, -1) + Ms = np.mean(sorted_group, -1) + Ns = np.mean(sorted_sample, -1) + nsc = np.minimum((Ns[1:] - Ns[:-1]) / (Ms[1:] - Ms[:-1] + 1e-8), 0) + return np.sum(nsc) + + def get_la_features(agent, pop_x, pop_y): """ Extracts 9 Landscape Analysis features based on the logic in Population.py. @@ -424,7 +439,7 @@ def get_la_features(agent, pop_x, pop_y): random_walk_samples = pop_x + np.random.normal(0, step_size, size=pop_x.shape) # Evaluate the random walk samples - sample_costs = [agent.fitness_function(i) for i in random_walk_samples] + sample_costs = np.array([agent.fitness_function(i) for i in random_walk_samples]) agent.n_function_evaluations += n # Increment evaluations by population size # Calculate differences between the walk and the current population @@ -432,7 +447,7 @@ def get_la_features(agent, pop_x, pop_y): # --- Feature 5: Negative Slope Coefficient (nsc) --- # Proportion of steps that resulted in an improvement - f5_nsc = np.sum(diffs < 0) / n + f5_nsc = negative_slope_coefficient(pop_y, sample_cost=sample_costs) # --- Feature 6: Average Neutral Ratio (anr) --- # Proportion of steps that resulted in practically zero change diff --git a/dynamicalgorithmselection/agents/ppo_utils.py b/dynamicalgorithmselection/agents/ppo_utils.py index cf1f19f..d2156f0 100644 --- a/dynamicalgorithmselection/agents/ppo_utils.py +++ b/dynamicalgorithmselection/agents/ppo_utils.py @@ -142,48 +142,98 @@ def forward(self, advantage, log_prob): class RLDASNetwork(nn.Module): - def __init__(self, d_dim, num_algorithms, la_dim=9): + def __init__(self, d_dim, num_algorithms): super(RLDASNetwork, self).__init__() self.L = num_algorithms self.D = d_dim - self.la_dim = la_dim - self.ah_input_flat_dim = self.L * 2 * self.D + self.actor = RLDASActor(d_dim, num_algorithms, DEVICE) + self.critic = RLDASCritic(d_dim, num_algorithms, DEVICE) - self.ah_embed = nn.Sequential( - nn.Linear(self.ah_input_flat_dim, 64), - nn.ReLU(), - nn.Linear(64, 2 * self.L), # Output size aligned with paper description - nn.ReLU(), - ) - self.fusion_input_dim = self.la_dim + (2 * self.L) + def forward(self, la_state, ah_state): + return self.actor(la_state, ah_state), self.critic(la_state, ah_state) - self.dv_layer = nn.Sequential(nn.Linear(self.fusion_input_dim, 64), nn.Tanh()) - self.actor_head = nn.Sequential( - nn.Linear(64, 16), nn.Tanh(), nn.Linear(16, self.L), nn.Softmax(dim=-1) - ) +class RLDASActor(nn.Module): + def __init__(self, dim, optimizer_num, device): + super().__init__() + self.device = device + self.optimizer_num = optimizer_num + self.embedders = [ + ( + nn.Sequential( + nn.Linear(dim, 64), + nn.ReLU(), + nn.Linear(64, 1), + nn.ReLU(), + ) + ).to(device) + for _ in range(2 * optimizer_num) + ] - self.critic_head = nn.Sequential( - nn.Linear(64, 64), - nn.ReLU(), - nn.Linear(64, 1), # Scalar Value - ) + self.embedder_final = nn.Sequential( + nn.Linear(9 + optimizer_num * 2, 64), + nn.Tanh(), + ).to(device) + self.model = nn.Sequential( + nn.Linear(64, 16), + nn.Tanh(), + nn.Linear(16, optimizer_num), + nn.Softmax(dim=-1), + ).to(device) def forward(self, la_state, ah_state): - if ah_state.dim() > 2: - batch_size = ah_state.size(0) - ah_flat = ah_state.view(batch_size, -1) - else: - ah_flat = ah_state + flattened_ah_state = torch.flatten(ah_state, start_dim=1, end_dim=2) + + embedded_ah = [ + embedder(flattened_ah_state[:, i, :]) + for i, embedder in enumerate(self.embedders) + ] - v_ah = self.ah_embed(ah_flat) + embedded_ah = torch.cat(embedded_ah, dim=-1) + batch_size = ah_state.shape[0] + x = torch.cat((la_state, embedded_ah), dim=-1).view(batch_size, -1) + x = self.embedder_final(x) + probs = self.model(x) - combined = torch.cat([la_state, v_ah], dim=1) + return probs - dv = self.dv_layer(combined) - probs = self.actor_head(dv) - value = self.critic_head(dv) +class RLDASCritic(nn.Module): + def __init__(self, dim, optimizer_num, device): + super().__init__() + self.device = device + self.embedders = [ + ( + nn.Sequential( + nn.Linear(dim, 64), + nn.ReLU(), + nn.Linear(64, 1), + nn.ReLU(), + ) + ).to(device) + for _ in range(2 * optimizer_num) + ] + self.embedder_final = nn.Sequential( + nn.Linear(9 + optimizer_num * 2, 64), + nn.Tanh(), + ).to(device) + self.model = nn.Sequential( + nn.Linear(64, 16), + nn.Tanh(), + nn.Linear(16, 1), + ).to(device) + + def forward(self, la_state, ah_state): + flattened_ah_state = torch.flatten(ah_state, start_dim=1, end_dim=2) + embedded_ah = [ + embedder(flattened_ah_state[:, i, :]) + for i, embedder in enumerate(self.embedders) + ] + embedded_ah = torch.cat(embedded_ah, dim=-1) + batch_size = ah_state.shape[0] + feature = torch.cat((la_state, embedded_ah), dim=-1).view(batch_size, -1) + feature = self.embedder_final(feature) + val = self.model(feature.view(batch_size, -1)) - return probs, value + return val diff --git a/dynamicalgorithmselection/experiments/core.py b/dynamicalgorithmselection/experiments/core.py index 6ab5405..368814b 100644 --- a/dynamicalgorithmselection/experiments/core.py +++ b/dynamicalgorithmselection/experiments/core.py @@ -45,19 +45,23 @@ def run_training( ): agent_state: dict[str, Any] = {} n_epochs = options["n_epochs"] - for problem_id in tqdm( - np.random.permutation(problem_ids).tolist() * n_epochs, smoothing=0.0 - ): - problem_instance = problems_suite.get_problem(problem_id) - max_fe = evaluations_multiplier * problem_instance.dimension - options["max_function_evaluations"] = max_fe - options.update(agent_state) - options["train_mode"] = True - options["verbose"] = False - results, agent_state = coco_bbob_single_function( - optimizer, problem_instance, options - ) - options["state_normalizer"] = agent_state["state_normalizer"] - options["reward_normalizer"] = agent_state["reward_normalizer"] - options["buffer"] = agent_state["buffer"] - problem_instance.free() + options["clip_eps"] = 0.3 + epsilon_decay = 0.99 + for epoch in range(n_epochs): + for problem_id in tqdm( + np.random.permutation(problem_ids).tolist(), smoothing=0.0 + ): + problem_instance = problems_suite.get_problem(problem_id) + max_fe = evaluations_multiplier * problem_instance.dimension + options["max_function_evaluations"] = max_fe + options.update(agent_state) + options["train_mode"] = True + options["verbose"] = False + results, agent_state = coco_bbob_single_function( + optimizer, problem_instance, options + ) + options["state_normalizer"] = agent_state["state_normalizer"] + options["reward_normalizer"] = agent_state["reward_normalizer"] + options["buffer"] = agent_state["buffer"] + problem_instance.free() + options["clip_eps"] *= epsilon_decay