From de90dc9bd3b2ff0d929531361fb0db22ee2ee502 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Fri, 27 Feb 2026 19:33:13 +0100
Subject: [PATCH 1/3] minor fixes in runners

---
 README.md                                     |  2 +-
 .../agents/RLDAS_agent.py                     |  1 -
 .../agents/agent_utils.py                     |  4 --
 reward_study.slurm                            | 67 +++++++++++++++++++
 runner.slurm                                  |  6 +-
 5 files changed, 71 insertions(+), 9 deletions(-)
 create mode 100644 reward_study.slurm
diff --git a/README.md b/README.md
index 5469d65..323a285 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ uv run das <name> [options]
 | `-x`, `--cdb`                      | `float`     | `1.0`                       | **Checkpoint Division Exponent**; determines how quickly checkpoint length increases.                            |
 | `-r`, `--state-representation`     | `str`       | `ELA`                       | Method used to extract features from the algorithm population.                                                   |
 | `-d`, `--force-restarts`           | `bool`      | `False`                     | Enable selection of forcibly restarting optimizers.                                                              |
-| `-D`, `--dimensionality`           | `int`       | `None`                      | Dimensionality of problems.                                                                                      |
+| `-D`, `--dimensionality`           | `list[int]` | `[2, 3, 5, 10, 20, 40]`     | Dimensionality of problems.                                                                                      |
 | `-E`, `--n_epochs`                 | `int`       | `1`                         | Number of training epochs.                                                                                       |
 | `-O`, `--reward-option`            | `int`       | `1`                         | ID of method used to compute reward.                                                                             |
 
diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
index f02bc47..7cfc97a 100644
--- a/dynamicalgorithmselection/agents/RLDAS_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -272,7 +272,6 @@ def optimize(self, fitness_function=None, args=None):
 
             self._n_generations += 1
             self._print_verbose_info(fitness, self.best_so_far_y)
-        print(self._n_generations)
         fes_end = self.n_function_evaluations
         speed_factor = self.max_function_evaluations / fes_end
 
diff --git a/dynamicalgorithmselection/agents/agent_utils.py b/dynamicalgorithmselection/agents/agent_utils.py
index 0bb2f23..019eefa 100644
--- a/dynamicalgorithmselection/agents/agent_utils.py
+++ b/dynamicalgorithmselection/agents/agent_utils.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 import numpy as np
 
 MAX_DIM = 40
@@ -14,7 +12,6 @@ def get_runtime_stats(
     """
     :param fitness_history: list of tuples [fe, fitness] with only points where best so far fitness improved
     :param function_evaluations: max number of function evaluations during run.
-    :param checkpoints: list of checkpoints by their n_function_evaluations
     :return: dictionary of selected run statistics, ready to dump
     """
     area_under_optimization_curve = 0.0
@@ -43,7 +40,6 @@ def get_extreme_stats(
     """
     :param fitness_histories: list of lists of tuples [fe, fitness] with only points where best so far fitness improved for each algorithm
     :param function_evaluations: max number of function evaluations during run.
-    :param checkpoints: list of checkpoints by their n_function_evaluations
     :return: dictionary of selected run statistics, ready to dump
     """
     all_improvements = []
diff --git a/reward_study.slurm b/reward_study.slurm
new file mode 100644
index 0000000..ff7ac0f
--- /dev/null
+++ b/reward_study.slurm
@@ -0,0 +1,67 @@
+#!/bin/bash
+#SBATCH --job-name=rl_das_experiment
+#SBATCH --output=logs/experiment_%A_%a.out
+#SBATCH --error=logs/experiment_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH --array=0-9           # 10 tasks total
+
+CDB_VAL=${1:-1.5}
+
+if [ "$#" -gt 0 ]; then
+    shift
+fi
+
+if [ "$#" -eq 0 ]; then
+    PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
+else
+    PORTFOLIO=("$@")
+fi
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
+
+# CONFIGURATION
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 1. Dimension-specific CV-LOIO (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 2. Dimension-specific CV-LOPO (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 3. Multidimensional CV-LOIO (Index 8)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
+    MODE="CV-LOIO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 4. Multidimensional CV-LOPO (Index 9)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
+    MODE="CV-LOPO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+fi
\ No newline at end of file
diff --git a/runner.slurm b/runner.slurm
index d0bbca4..7410b16 100644
--- a/runner.slurm
+++ b/runner.slurm
@@ -63,7 +63,7 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 12 && $SLURM_ARRAY_TASK_ID -le 15 ]]; then
     echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
 
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
-      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
+      -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
 # 5. Dimension-specific RL-DAS-random (Indices 16-19)
@@ -79,14 +79,14 @@ elif [[ $SLURM_ARRAY_TASK_ID -eq 20 ]]; then
     MODE="CV-LOIO"
     echo "Running Mode: $MODE | Multidimensional PG"
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 
 # 7. Multidimensional CV-LOPO (Index 21)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 21 ]]; then
     MODE="CV-LOPO"
     echo "Running Mode: $MODE | Multidimensional PG"
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 
 # 8. Global Random Agent (Index 22)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 22 ]]; then

From 1c770aaf620df0476c6529e68eea907b7716bb88 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Tue, 3 Mar 2026 22:36:49 +0100
Subject: [PATCH 2/3] new experiment variants

---
 CDB_study.slurm                  |  4 +--
 portfolio_study.slurm            | 21 +++++------
 runner.slurm                     | 54 ++++++++++++++--------------
 single_algorithm_CDB_study.slurm | 62 ++++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 43 deletions(-)
 create mode 100644 single_algorithm_CDB_study.slurm

diff --git a/CDB_study.slurm b/CDB_study.slurm
index ff7ac0f..628326b 100644
--- a/CDB_study.slurm
+++ b/CDB_study.slurm
@@ -37,7 +37,7 @@ if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
     DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
@@ -47,7 +47,7 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
     DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
diff --git a/portfolio_study.slurm b/portfolio_study.slurm
index 4282edd..d320d2f 100644
--- a/portfolio_study.slurm
+++ b/portfolio_study.slurm
@@ -9,17 +9,12 @@
 #SBATCH --partition=plgrid-gpu-a100
 #SBATCH --array=0-9           # 10 tasks total
 
-CDB_VAL=${1:-1.5}
+REWARD_OPTION=${1:-1}
 
-if [ "$#" -gt 0 ]; then
-    shift
-fi
+CDB_VAL=1.5
+
+PORTFOLIO=('MADDE' 'CMAES' 'SPSO')
 
-if [ "$#" -eq 0 ]; then
-    PORTFOLIO=('MADDE' 'CMAES' 'SPSO')
-else
-    PORTFOLIO=("$@")
-fi
 PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
 
 
@@ -37,7 +32,7 @@ if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
     DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_$_REWARD_${REWARD_OPTION}_DIM${DIM} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
@@ -47,7 +42,7 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
     DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_REWARD_${REWARD_OPTION}_DIM${DIM} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
@@ -55,13 +50,13 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
 elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
     MODE="CV-LOIO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_REWARD_${REWARD_OPTION} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 
 # 4. Multidimensional CV-LOPO (Index 9)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
     MODE="CV-LOPO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_REWARD_${REWARD_OPTION} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 fi
\ No newline at end of file
diff --git a/runner.slurm b/runner.slurm
index 7410b16..9aeb115 100644
--- a/runner.slurm
+++ b/runner.slurm
@@ -9,15 +9,13 @@
 #SBATCH --partition=plgrid-gpu-a100
 #SBATCH --array=0-23           # Increased to 24 tasks total to split sequential runs
 
+# 1st argument: CDB_VAL (Default: 1.5)
 CDB_VAL=${1:-1.5}
 
-if [ "$#" -gt 0 ]; then
-    shift
-fi
-
-# Store the remaining arguments as an array called PORTFOLIO.
-# If no additional arguments were provided, fall back to your default.
+# 2nd argument: SEED (Default: 42)
+SEED=${2:-42}
 
+# Fixed PORTFOLIO variable
 PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
 
 # CONFIGURATION
@@ -31,72 +29,72 @@ DIMS=(2 3 5 10)
 # 1. Dimension-specific CV-LOIO | RL-DAS (Indices 0-3)
 if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
     MODE="CV-LOIO"
-    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    DIM=DIM${DIMS[$SLURM_ARRAY_TASK_ID]}
     echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \
-      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED"
 
 # 2. Dimension-specific CV-LOIO | Policy Gradient (Indices 4-7)
 elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
     MODE="CV-LOIO"
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    DIM=DIM${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
     echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_DIM${DIM}_SEED${SEED} \
       -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S "$SEED"
 
 # 3. Dimension-specific CV-LOPO | RL-DAS (Indices 8-11)
 elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then
     MODE="CV-LOPO"
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]}
+    DIM=DIM${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]}
     echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \
-      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS -S "$SEED"
 
 # 4. Dimension-specific CV-LOPO | Policy Gradient (Indices 12-15)
 elif [[ $SLURM_ARRAY_TASK_ID -ge 12 && $SLURM_ARRAY_TASK_ID -le 15 ]]; then
     MODE="CV-LOPO"
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 12))]}
+    DIM=DIM${DIMS[$((SLURM_ARRAY_TASK_ID - 12))]}
     echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_DIM${DIM}_SEED${SEED} \
       -p "${PORTFOLIO[@]}" --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient -S "$SEED"
 
 # 5. Dimension-specific RL-DAS-random (Indices 16-19)
 elif [[ $SLURM_ARRAY_TASK_ID -ge 16 && $SLURM_ARRAY_TASK_ID -le 19 ]]; then
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 16))]}
+    DIM=DIM${DIMS[$((SLURM_ARRAY_TASK_ID - 16))]}
     echo "Running Mode: Random Agent - RLDAS variant | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_${DIM} \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --agent RL-DAS-random --dimensionality $DIM
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_DIM${DIM}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --agent RL-DAS-random --dimensionality $DIM -S "$SEED"
 
 # 6. Multidimensional CV-LOIO (Index 20)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 20 ]]; then
     MODE="CV-LOIO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S "$SEED"
 
 # 7. Multidimensional CV-LOPO (Index 21)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 21 ]]; then
     MODE="CV-LOPO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --mode $MODE --cdb $CDB_VAL --agent policy-gradient -S "$SEED"
 
 # 8. Global Random Agent (Index 22)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 22 ]]; then
     echo "Running Mode: Global Random Agent"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL}_SEED${SEED} \
+      -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random -S "$SEED"
 
 # 9. Global Baselines (Index 23)
 elif [[ $SLURM_ARRAY_TASK_ID -eq 23 ]]; then
     echo "Running Mode: Baselines"
     python3 dynamicalgorithmselection/main.py BASELINES \
-      -p "${PORTFOLIO[@]}" --mode baselines
+      -p "${PORTFOLIO[@]}" --mode baselines -S "$SEED"
 fi
\ No newline at end of file
diff --git a/single_algorithm_CDB_study.slurm b/single_algorithm_CDB_study.slurm
new file mode 100644
index 0000000..c7c0448
--- /dev/null
+++ b/single_algorithm_CDB_study.slurm
@@ -0,0 +1,62 @@
+#!/bin/bash
+#SBATCH --job-name=rl_das_experiment
+#SBATCH --output=logs/experiment_%A_%a.out
+#SBATCH --error=logs/experiment_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH --array=0-9           # 10 tasks total
+
+REWARD_OPTION=${1:-1}
+
+CDB_VAL=1.5
+
+PORTFOLIO=('MADDE')
+
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
+
+# CONFIGURATION
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 1. Dimension-specific CV-LOIO (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_DIM${DIM} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 2. Dimension-specific CV-LOPO (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_DIM${DIM} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 3. Multidimensional CV-LOIO (Index 8)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
+    MODE="CV-LOIO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 4. Multidimensional CV-LOPO (Index 9)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
+    MODE="CV-LOPO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+fi
\ No newline at end of file

From add23f995b6ad573e5ecf7d819047ff1321f6b2c Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Thu, 5 Mar 2026 00:41:03 +0100
Subject: [PATCH 3/3] fix RL-DAS implementation

---
 .../agents/RLDAS_agent.py                     |  46 ++++----
 .../agents/RLDAS_random_agent.py              |   9 --
 .../agents/agent_state.py                     |  19 ++-
 dynamicalgorithmselection/agents/ppo_utils.py | 110 +++++++++++++-----
 dynamicalgorithmselection/experiments/core.py |  36 +++---
 5 files changed, 137 insertions(+), 83 deletions(-)

diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
index 7cfc97a..0e5dfb9 100644
--- a/dynamicalgorithmselection/agents/RLDAS_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -87,6 +87,8 @@ def _update_ah_history(
             self.ah_vectors[alg_idx, 1] * H + sv_worst_current
         ) / (H + 1)
 
+        # Here I am computing current average
+
         self.alg_usage_counts[alg_idx] += 1
 
     def _save_context(self, optimizer, alg_name):
@@ -167,25 +169,20 @@ def optimize(self, fitness_function=None, args=None):
         population_x, population_y = self.initialize()
         self.n_function_evaluations = INITIAL_POPSIZE
 
-        best_idx = np.argmin(population_y)
-        best_y_global = population_y[best_idx]
-        best_x_global = population_x[best_idx].copy()
-
-        self.best_so_far_y = best_y_global
-        self.best_so_far_x = best_x_global
-
         self.history.append(self.best_so_far_y)
         fitness.append(float(self.best_so_far_y))
 
-        self.initial_cost = best_y_global if abs(best_y_global) > 1e-8 else 1.0
+        self.initial_cost = (
+            self.best_so_far_y if abs(self.best_so_far_y) > 1e-8 else 1.0
+        )
 
         self.ah_vectors.fill(0.0)
         self.alg_usage_counts.fill(0.0)
         self.context_memory = {name: {} for name in self.alg_names}
         self.context_memory["Common"] = {}
-
+        cost_new, cost_old = float(np.min(population_y)), float(np.min(population_y))
         trajectory = []
-
+        clip_eps = self.options.get("ppo_eps", 0.3)
         while self.n_function_evaluations < self.max_function_evaluations:
             state = self.get_state(population_x, population_y)
 
@@ -203,7 +200,7 @@ def optimize(self, fitness_function=None, args=None):
 
             x_best_old = population_x[np.argmin(population_y)].copy()
             x_worst_old = population_x[np.argmax(population_y)].copy()
-            cost_old = np.copy(np.min(population_y))
+            cost_old = float(cost_new)
 
             target_fes = min(
                 self.n_function_evaluations + self.schedule_interval,
@@ -237,13 +234,18 @@ def optimize(self, fitness_function=None, args=None):
 
             x_best_new: np.ndarray = population_x[np.argmin(population_y)].copy()
             x_worst_new: np.ndarray = population_x[np.argmax(population_y)].copy()
-            cost_new: float = np.min(population_y)
+            cost_new: float = self.best_so_far_y
 
             self._update_ah_history(
                 action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
             )
 
-            adc = (cost_old - cost_new) / self.initial_cost
+            # Update Agent Best State and History
+            if cost_new < self.best_so_far_y:
+                self.best_so_far_y = cost_new
+                self.best_so_far_x = x_best_new
+
+            adc = (cost_old - self.best_so_far_y) / self.initial_cost
             if self.run:
                 self.run.log({"adc": adc})
 
@@ -260,23 +262,15 @@ def optimize(self, fitness_function=None, args=None):
                 }
             )
 
-            best_y_global = min(best_y_global, cost_new)
-
-            # Update Agent Best State and History
-            if cost_new < self.best_so_far_y:
-                self.best_so_far_y = cost_new
-                self.best_so_far_x = x_best_new
-
             self.history.append(self.best_so_far_y)
             fitness.append(float(self.best_so_far_y))
 
             self._n_generations += 1
             self._print_verbose_info(fitness, self.best_so_far_y)
-        fes_end = self.n_function_evaluations
-        speed_factor = self.max_function_evaluations / fes_end
+        speed_factor = self.max_function_evaluations / self.n_function_evaluations
 
         for step in trajectory:
-            final_reward = step["adc"] * speed_factor
+            final_reward = max(step["adc"] * speed_factor, 0)
             self.rewards.append(final_reward)
             la_state, ah_state = step["state"]
 
@@ -300,7 +294,7 @@ def optimize(self, fitness_function=None, args=None):
                 self.buffer,
                 epochs=K,
                 minibatch_size=32,
-                clip_eps=0.2,
+                clip_eps=clip_eps,
                 value_coef=0.5,
                 entropy_coef=0.01,
             )
@@ -336,7 +330,7 @@ def ppo_update(
         buffer,
         epochs=4,
         minibatch_size=None,
-        clip_eps=0.2,
+        clip_eps=0.3,
         value_coef=0.5,
         entropy_coef=0.01,
     ):
@@ -405,5 +399,5 @@ def ppo_update(
 
             self.optimizer.zero_grad()
             loss.backward()
-            torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.5)
+            torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.1)
             self.optimizer.step()
diff --git a/dynamicalgorithmselection/agents/RLDAS_random_agent.py b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
index 71a55b5..fe08eca 100644
--- a/dynamicalgorithmselection/agents/RLDAS_random_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
@@ -110,13 +110,6 @@ def optimize(self, fitness_function=None, args=None):
         population_x, population_y = self.initialize()
         self.n_function_evaluations = INITIAL_POPSIZE
 
-        best_idx = np.argmin(population_y)
-        best_y_global = population_y[best_idx]
-        best_x_global = population_x[best_idx].copy()
-
-        self.best_so_far_y = best_y_global
-        self.best_so_far_x = best_x_global
-
         self.history.append(self.best_so_far_y)
         fitness.append(float(self.best_so_far_y))
 
@@ -179,8 +172,6 @@ def optimize(self, fitness_function=None, args=None):
                 action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
             )
 
-            best_y_global = min(best_y_global, cost_new)
-
             if cost_new < self.best_so_far_y:
                 self.best_so_far_y = cost_new
                 self.best_so_far_x = x_best_new
diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py
index 856dc9e..3ccb6c8 100644
--- a/dynamicalgorithmselection/agents/agent_state.py
+++ b/dynamicalgorithmselection/agents/agent_state.py
@@ -373,6 +373,21 @@ def normalize(self, state, update=True):
         return np.clip(normalized_state, -5.0, 5.0)
 
 
+def negative_slope_coefficient(group_cost, sample_cost):  # [j]
+    gs = sample_cost.shape[0]
+    m = 10
+    gs -= gs % m  # to be divisible
+    if gs < m:  # not enough costs for m dividing
+        return 0
+    sorted_cost = np.array(sorted(list(zip(group_cost[:gs], sample_cost[:gs]))))
+    sorted_group = sorted_cost[:, 0].reshape(m, -1)
+    sorted_sample = sorted_cost[:, 1].reshape(m, -1)
+    Ms = np.mean(sorted_group, -1)
+    Ns = np.mean(sorted_sample, -1)
+    nsc = np.minimum((Ns[1:] - Ns[:-1]) / (Ms[1:] - Ms[:-1] + 1e-8), 0)
+    return np.sum(nsc)
+
+
 def get_la_features(agent, pop_x, pop_y):
     """
     Extracts 9 Landscape Analysis features based on the logic in Population.py.
@@ -424,7 +439,7 @@ def get_la_features(agent, pop_x, pop_y):
     random_walk_samples = pop_x + np.random.normal(0, step_size, size=pop_x.shape)
 
     # Evaluate the random walk samples
-    sample_costs = [agent.fitness_function(i) for i in random_walk_samples]
+    sample_costs = np.array([agent.fitness_function(i) for i in random_walk_samples])
     agent.n_function_evaluations += n  # Increment evaluations by population size
 
     # Calculate differences between the walk and the current population
@@ -432,7 +447,7 @@ def get_la_features(agent, pop_x, pop_y):
 
     # --- Feature 5: Negative Slope Coefficient (nsc) ---
     # Proportion of steps that resulted in an improvement
-    f5_nsc = np.sum(diffs < 0) / n
+    f5_nsc = negative_slope_coefficient(pop_y, sample_cost=sample_costs)
 
     # --- Feature 6: Average Neutral Ratio (anr) ---
     # Proportion of steps that resulted in practically zero change
diff --git a/dynamicalgorithmselection/agents/ppo_utils.py b/dynamicalgorithmselection/agents/ppo_utils.py
index cf1f19f..d2156f0 100644
--- a/dynamicalgorithmselection/agents/ppo_utils.py
+++ b/dynamicalgorithmselection/agents/ppo_utils.py
@@ -142,48 +142,98 @@ def forward(self, advantage, log_prob):
 
 
 class RLDASNetwork(nn.Module):
-    def __init__(self, d_dim, num_algorithms, la_dim=9):
+    def __init__(self, d_dim, num_algorithms):
         super(RLDASNetwork, self).__init__()
         self.L = num_algorithms
         self.D = d_dim
-        self.la_dim = la_dim
 
-        self.ah_input_flat_dim = self.L * 2 * self.D
+        self.actor = RLDASActor(d_dim, num_algorithms, DEVICE)
+        self.critic = RLDASCritic(d_dim, num_algorithms, DEVICE)
 
-        self.ah_embed = nn.Sequential(
-            nn.Linear(self.ah_input_flat_dim, 64),
-            nn.ReLU(),
-            nn.Linear(64, 2 * self.L),  # Output size aligned with paper description
-            nn.ReLU(),
-        )
-        self.fusion_input_dim = self.la_dim + (2 * self.L)
+    def forward(self, la_state, ah_state):
+        return self.actor(la_state, ah_state), self.critic(la_state, ah_state)
 
-        self.dv_layer = nn.Sequential(nn.Linear(self.fusion_input_dim, 64), nn.Tanh())
 
-        self.actor_head = nn.Sequential(
-            nn.Linear(64, 16), nn.Tanh(), nn.Linear(16, self.L), nn.Softmax(dim=-1)
-        )
+class RLDASActor(nn.Module):
+    def __init__(self, dim, optimizer_num, device):
+        super().__init__()
+        self.device = device
+        self.optimizer_num = optimizer_num
+        self.embedders = [
+            (
+                nn.Sequential(
+                    nn.Linear(dim, 64),
+                    nn.ReLU(),
+                    nn.Linear(64, 1),
+                    nn.ReLU(),
+                )
+            ).to(device)
+            for _ in range(2 * optimizer_num)
+        ]
 
-        self.critic_head = nn.Sequential(
-            nn.Linear(64, 64),
-            nn.ReLU(),
-            nn.Linear(64, 1),  # Scalar Value
-        )
+        self.embedder_final = nn.Sequential(
+            nn.Linear(9 + optimizer_num * 2, 64),
+            nn.Tanh(),
+        ).to(device)
+        self.model = nn.Sequential(
+            nn.Linear(64, 16),
+            nn.Tanh(),
+            nn.Linear(16, optimizer_num),
+            nn.Softmax(dim=-1),
+        ).to(device)
 
     def forward(self, la_state, ah_state):
-        if ah_state.dim() > 2:
-            batch_size = ah_state.size(0)
-            ah_flat = ah_state.view(batch_size, -1)
-        else:
-            ah_flat = ah_state
+        flattened_ah_state = torch.flatten(ah_state, start_dim=1, end_dim=2)
+
+        embedded_ah = [
+            embedder(flattened_ah_state[:, i, :])
+            for i, embedder in enumerate(self.embedders)
+        ]
 
-        v_ah = self.ah_embed(ah_flat)
+        embedded_ah = torch.cat(embedded_ah, dim=-1)
+        batch_size = ah_state.shape[0]
+        x = torch.cat((la_state, embedded_ah), dim=-1).view(batch_size, -1)
+        x = self.embedder_final(x)
+        probs = self.model(x)
 
-        combined = torch.cat([la_state, v_ah], dim=1)
+        return probs
 
-        dv = self.dv_layer(combined)
 
-        probs = self.actor_head(dv)
-        value = self.critic_head(dv)
+class RLDASCritic(nn.Module):
+    def __init__(self, dim, optimizer_num, device):
+        super().__init__()
+        self.device = device
+        self.embedders = [
+            (
+                nn.Sequential(
+                    nn.Linear(dim, 64),
+                    nn.ReLU(),
+                    nn.Linear(64, 1),
+                    nn.ReLU(),
+                )
+            ).to(device)
+            for _ in range(2 * optimizer_num)
+        ]
+        self.embedder_final = nn.Sequential(
+            nn.Linear(9 + optimizer_num * 2, 64),
+            nn.Tanh(),
+        ).to(device)
+        self.model = nn.Sequential(
+            nn.Linear(64, 16),
+            nn.Tanh(),
+            nn.Linear(16, 1),
+        ).to(device)
+
+    def forward(self, la_state, ah_state):
+        flattened_ah_state = torch.flatten(ah_state, start_dim=1, end_dim=2)
+        embedded_ah = [
+            embedder(flattened_ah_state[:, i, :])
+            for i, embedder in enumerate(self.embedders)
+        ]
+        embedded_ah = torch.cat(embedded_ah, dim=-1)
+        batch_size = ah_state.shape[0]
+        feature = torch.cat((la_state, embedded_ah), dim=-1).view(batch_size, -1)
+        feature = self.embedder_final(feature)
+        val = self.model(feature.view(batch_size, -1))
 
-        return probs, value
+        return val
diff --git a/dynamicalgorithmselection/experiments/core.py b/dynamicalgorithmselection/experiments/core.py
index 6ab5405..368814b 100644
--- a/dynamicalgorithmselection/experiments/core.py
+++ b/dynamicalgorithmselection/experiments/core.py
@@ -45,19 +45,23 @@ def run_training(
 ):
     agent_state: dict[str, Any] = {}
     n_epochs = options["n_epochs"]
-    for problem_id in tqdm(
-        np.random.permutation(problem_ids).tolist() * n_epochs, smoothing=0.0
-    ):
-        problem_instance = problems_suite.get_problem(problem_id)
-        max_fe = evaluations_multiplier * problem_instance.dimension
-        options["max_function_evaluations"] = max_fe
-        options.update(agent_state)
-        options["train_mode"] = True
-        options["verbose"] = False
-        results, agent_state = coco_bbob_single_function(
-            optimizer, problem_instance, options
-        )
-        options["state_normalizer"] = agent_state["state_normalizer"]
-        options["reward_normalizer"] = agent_state["reward_normalizer"]
-        options["buffer"] = agent_state["buffer"]
-        problem_instance.free()
+    options["clip_eps"] = 0.3
+    epsilon_decay = 0.99
+    for epoch in range(n_epochs):
+        for problem_id in tqdm(
+            np.random.permutation(problem_ids).tolist(), smoothing=0.0
+        ):
+            problem_instance = problems_suite.get_problem(problem_id)
+            max_fe = evaluations_multiplier * problem_instance.dimension
+            options["max_function_evaluations"] = max_fe
+            options.update(agent_state)
+            options["train_mode"] = True
+            options["verbose"] = False
+            results, agent_state = coco_bbob_single_function(
+                optimizer, problem_instance, options
+            )
+            options["state_normalizer"] = agent_state["state_normalizer"]
+            options["reward_normalizer"] = agent_state["reward_normalizer"]
+            options["buffer"] = agent_state["buffer"]
+            problem_instance.free()
+        options["clip_eps"] *= epsilon_decay