wniec · wniec · Feb 27, 2026 · Mar 3, 2026 · Mar 4, 2026
diff --git a/CDB_study.slurm b/CDB_study.slurm
@@ -37,7 +37,7 @@ if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
     DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
@@ -47,7 +47,7 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
     DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 

diff --git a/README.md b/README.md
@@ -73,7 +73,7 @@ uv run das <name> [options]
 | `-x`, `--cdb`                      | `float`     | `1.0`                       | **Checkpoint Division Exponent**; determines how quickly checkpoint length increases.                            |
 | `-r`, `--state-representation`     | `str`       | `ELA`                       | Method used to extract features from the algorithm population.                                                   |
 | `-d`, `--force-restarts`           | `bool`      | `False`                     | Enable selection of forcibly restarting optimizers.                                                              |
-| `-D`, `--dimensionality`           | `int`       | `None`                      | Dimensionality of problems.                                                                                      |
+| `-D`, `--dimensionality`           | `list[int]` | `[2, 3, 5, 10, 20, 40]`     | Dimensionality of problems.                                                                                      |
 | `-E`, `--n_epochs`                 | `int`       | `1`                         | Number of training epochs.                                                                                       |
 | `-O`, `--reward-option`            | `int`       | `1`                         | ID of method used to compute reward.                                                                             |
 

diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -87,6 +87,8 @@ def _update_ah_history(
             self.ah_vectors[alg_idx, 1] * H + sv_worst_current
         ) / (H + 1)
 
+        # Here I am computing current average
+
         self.alg_usage_counts[alg_idx] += 1
 
     def _save_context(self, optimizer, alg_name):
@@ -167,25 +169,20 @@ def optimize(self, fitness_function=None, args=None):
         population_x, population_y = self.initialize()
         self.n_function_evaluations = INITIAL_POPSIZE
 
-        best_idx = np.argmin(population_y)
-        best_y_global = population_y[best_idx]
-        best_x_global = population_x[best_idx].copy()
-
-        self.best_so_far_y = best_y_global
-        self.best_so_far_x = best_x_global
-
         self.history.append(self.best_so_far_y)
         fitness.append(float(self.best_so_far_y))
 
-        self.initial_cost = best_y_global if abs(best_y_global) > 1e-8 else 1.0
+        self.initial_cost = (
+            self.best_so_far_y if abs(self.best_so_far_y) > 1e-8 else 1.0
+        )
 
         self.ah_vectors.fill(0.0)
         self.alg_usage_counts.fill(0.0)
         self.context_memory = {name: {} for name in self.alg_names}
         self.context_memory["Common"] = {}
-
+        cost_new, cost_old = float(np.min(population_y)), float(np.min(population_y))
         trajectory = []
-
+        clip_eps = self.options.get("ppo_eps", 0.3)
         while self.n_function_evaluations < self.max_function_evaluations:
             state = self.get_state(population_x, population_y)
 
@@ -203,7 +200,7 @@ def optimize(self, fitness_function=None, args=None):
 
             x_best_old = population_x[np.argmin(population_y)].copy()
             x_worst_old = population_x[np.argmax(population_y)].copy()
-            cost_old = np.copy(np.min(population_y))
+            cost_old = float(cost_new)
 
             target_fes = min(
                 self.n_function_evaluations + self.schedule_interval,
@@ -237,13 +234,18 @@ def optimize(self, fitness_function=None, args=None):
 
             x_best_new: np.ndarray = population_x[np.argmin(population_y)].copy()
             x_worst_new: np.ndarray = population_x[np.argmax(population_y)].copy()
-            cost_new: float = np.min(population_y)
+            cost_new: float = self.best_so_far_y
 
             self._update_ah_history(
                 action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
             )
 
-            adc = (cost_old - cost_new) / self.initial_cost
+            # Update Agent Best State and History
+            if cost_new < self.best_so_far_y:
+                self.best_so_far_y = cost_new
+                self.best_so_far_x = x_best_new
+
+            adc = (cost_old - self.best_so_far_y) / self.initial_cost
             if self.run:
                 self.run.log({"adc": adc})
 
@@ -260,24 +262,15 @@ def optimize(self, fitness_function=None, args=None):
                 }
             )
 
-            best_y_global = min(best_y_global, cost_new)
-
-            # Update Agent Best State and History
-            if cost_new < self.best_so_far_y:
-                self.best_so_far_y = cost_new
-                self.best_so_far_x = x_best_new
-
             self.history.append(self.best_so_far_y)
             fitness.append(float(self.best_so_far_y))
 
             self._n_generations += 1
             self._print_verbose_info(fitness, self.best_so_far_y)
-        print(self._n_generations)
-        fes_end = self.n_function_evaluations
-        speed_factor = self.max_function_evaluations / fes_end
+        speed_factor = self.max_function_evaluations / self.n_function_evaluations
 
         for step in trajectory:
-            final_reward = step["adc"] * speed_factor
+            final_reward = max(step["adc"] * speed_factor, 0)
             self.rewards.append(final_reward)
             la_state, ah_state = step["state"]
 
@@ -301,7 +294,7 @@ def optimize(self, fitness_function=None, args=None):
                 self.buffer,
                 epochs=K,
                 minibatch_size=32,
-                clip_eps=0.2,
+                clip_eps=clip_eps,
                 value_coef=0.5,
                 entropy_coef=0.01,
             )
@@ -337,7 +330,7 @@ def ppo_update(
         buffer,
         epochs=4,
         minibatch_size=None,
-        clip_eps=0.2,
+        clip_eps=0.3,
         value_coef=0.5,
         entropy_coef=0.01,
     ):
@@ -406,5 +399,5 @@ def ppo_update(
 
             self.optimizer.zero_grad()
             loss.backward()
-            torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.5)
+            torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.1)
             self.optimizer.step()
diff --git a/dynamicalgorithmselection/agents/RLDAS_random_agent.py b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
@@ -110,13 +110,6 @@ def optimize(self, fitness_function=None, args=None):
         population_x, population_y = self.initialize()
         self.n_function_evaluations = INITIAL_POPSIZE
 
-        best_idx = np.argmin(population_y)
-        best_y_global = population_y[best_idx]
-        best_x_global = population_x[best_idx].copy()
-
-        self.best_so_far_y = best_y_global
-        self.best_so_far_x = best_x_global
-
         self.history.append(self.best_so_far_y)
         fitness.append(float(self.best_so_far_y))
 
@@ -179,8 +172,6 @@ def optimize(self, fitness_function=None, args=None):
                 action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
             )
 
-            best_y_global = min(best_y_global, cost_new)
-
             if cost_new < self.best_so_far_y:
                 self.best_so_far_y = cost_new
                 self.best_so_far_x = x_best_new

diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py
@@ -373,6 +373,21 @@ def normalize(self, state, update=True):
         return np.clip(normalized_state, -5.0, 5.0)
 
 
+def negative_slope_coefficient(group_cost, sample_cost):  # [j]
+    gs = sample_cost.shape[0]
+    m = 10
+    gs -= gs % m  # to be divisible
+    if gs < m:  # not enough costs for m dividing
+        return 0
+    sorted_cost = np.array(sorted(list(zip(group_cost[:gs], sample_cost[:gs]))))
+    sorted_group = sorted_cost[:, 0].reshape(m, -1)
+    sorted_sample = sorted_cost[:, 1].reshape(m, -1)
+    Ms = np.mean(sorted_group, -1)
+    Ns = np.mean(sorted_sample, -1)
+    nsc = np.minimum((Ns[1:] - Ns[:-1]) / (Ms[1:] - Ms[:-1] + 1e-8), 0)
+    return np.sum(nsc)
+
+
 def get_la_features(agent, pop_x, pop_y):
     """
     Extracts 9 Landscape Analysis features based on the logic in Population.py.
@@ -424,15 +439,15 @@ def get_la_features(agent, pop_x, pop_y):
     random_walk_samples = pop_x + np.random.normal(0, step_size, size=pop_x.shape)
 
     # Evaluate the random walk samples
-    sample_costs = [agent.fitness_function(i) for i in random_walk_samples]
+    sample_costs = np.array([agent.fitness_function(i) for i in random_walk_samples])
     agent.n_function_evaluations += n  # Increment evaluations by population size
 
     # Calculate differences between the walk and the current population
     diffs = np.array(sample_costs) - pop_y
 
     # --- Feature 5: Negative Slope Coefficient (nsc) ---
     # Proportion of steps that resulted in an improvement
-    f5_nsc = np.sum(diffs < 0) / n
+    f5_nsc = negative_slope_coefficient(pop_y, sample_cost=sample_costs)
 
     # --- Feature 6: Average Neutral Ratio (anr) ---
     # Proportion of steps that resulted in practically zero change

diff --git a/dynamicalgorithmselection/agents/agent_utils.py b/dynamicalgorithmselection/agents/agent_utils.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 import numpy as np
 
 MAX_DIM = 40
@@ -14,7 +12,6 @@ def get_runtime_stats(
     """
     :param fitness_history: list of tuples [fe, fitness] with only points where best so far fitness improved
     :param function_evaluations: max number of function evaluations during run.
-    :param checkpoints: list of checkpoints by their n_function_evaluations
     :return: dictionary of selected run statistics, ready to dump
     """
     area_under_optimization_curve = 0.0
@@ -43,7 +40,6 @@ def get_extreme_stats(
     """
     :param fitness_histories: list of lists of tuples [fe, fitness] with only points where best so far fitness improved for each algorithm
     :param function_evaluations: max number of function evaluations during run.
-    :param checkpoints: list of checkpoints by their n_function_evaluations
     :return: dictionary of selected run statistics, ready to dump
     """
     all_improvements = []

diff --git a/dynamicalgorithmselection/agents/ppo_utils.py b/dynamicalgorithmselection/agents/ppo_utils.py
@@ -142,48 +142,98 @@ def forward(self, advantage, log_prob):
 
 
 class RLDASNetwork(nn.Module):
-    def __init__(self, d_dim, num_algorithms, la_dim=9):
+    def __init__(self, d_dim, num_algorithms):
         super(RLDASNetwork, self).__init__()
         self.L = num_algorithms
         self.D = d_dim
-        self.la_dim = la_dim
 
-        self.ah_input_flat_dim = self.L * 2 * self.D
+        self.actor = RLDASActor(d_dim, num_algorithms, DEVICE)
+        self.critic = RLDASCritic(d_dim, num_algorithms, DEVICE)
 
-        self.ah_embed = nn.Sequential(
-            nn.Linear(self.ah_input_flat_dim, 64),
-            nn.ReLU(),
-            nn.Linear(64, 2 * self.L),  # Output size aligned with paper description
-            nn.ReLU(),
-        )
-        self.fusion_input_dim = self.la_dim + (2 * self.L)
+    def forward(self, la_state, ah_state):
+        return self.actor(la_state, ah_state), self.critic(la_state, ah_state)
 
-        self.dv_layer = nn.Sequential(nn.Linear(self.fusion_input_dim, 64), nn.Tanh())
 
-        self.actor_head = nn.Sequential(
-            nn.Linear(64, 16), nn.Tanh(), nn.Linear(16, self.L), nn.Softmax(dim=-1)
-        )
+class RLDASActor(nn.Module):
+    def __init__(self, dim, optimizer_num, device):
+        super().__init__()
+        self.device = device
+        self.optimizer_num = optimizer_num
+        self.embedders = [
+            (
+                nn.Sequential(
+                    nn.Linear(dim, 64),
+                    nn.ReLU(),
+                    nn.Linear(64, 1),
+                    nn.ReLU(),
+                )
+            ).to(device)
+            for _ in range(2 * optimizer_num)
+        ]
 
-        self.critic_head = nn.Sequential(
-            nn.Linear(64, 64),
-            nn.ReLU(),
-            nn.Linear(64, 1),  # Scalar Value
-        )
+        self.embedder_final = nn.Sequential(
+            nn.Linear(9 + optimizer_num * 2, 64),
+            nn.Tanh(),
+        ).to(device)
+        self.model = nn.Sequential(
+            nn.Linear(64, 16),
+            nn.Tanh(),
+            nn.Linear(16, optimizer_num),
+            nn.Softmax(dim=-1),
+        ).to(device)
 
     def forward(self, la_state, ah_state):
-        if ah_state.dim() > 2:
-            batch_size = ah_state.size(0)
-            ah_flat = ah_state.view(batch_size, -1)
-        else:
-            ah_flat = ah_state
+        flattened_ah_state = torch.flatten(ah_state, start_dim=1, end_dim=2)
+
+        embedded_ah = [
+            embedder(flattened_ah_state[:, i, :])
+            for i, embedder in enumerate(self.embedders)
+        ]
 
-        v_ah = self.ah_embed(ah_flat)
+        embedded_ah = torch.cat(embedded_ah, dim=-1)
+        batch_size = ah_state.shape[0]
+        x = torch.cat((la_state, embedded_ah), dim=-1).view(batch_size, -1)
+        x = self.embedder_final(x)
+        probs = self.model(x)
 
-        combined = torch.cat([la_state, v_ah], dim=1)
+        return probs
 
-        dv = self.dv_layer(combined)
 
-        probs = self.actor_head(dv)
-        value = self.critic_head(dv)
+class RLDASCritic(nn.Module):
+    def __init__(self, dim, optimizer_num, device):
+        super().__init__()
+        self.device = device
+        self.embedders = [
+            (
+                nn.Sequential(
+                    nn.Linear(dim, 64),
+                    nn.ReLU(),
+                    nn.Linear(64, 1),
+                    nn.ReLU(),
+                )
+            ).to(device)
+            for _ in range(2 * optimizer_num)
+        ]
+        self.embedder_final = nn.Sequential(
+            nn.Linear(9 + optimizer_num * 2, 64),
+            nn.Tanh(),
+        ).to(device)
+        self.model = nn.Sequential(
+            nn.Linear(64, 16),
+            nn.Tanh(),
+            nn.Linear(16, 1),
+        ).to(device)
+
+    def forward(self, la_state, ah_state):
+        flattened_ah_state = torch.flatten(ah_state, start_dim=1, end_dim=2)
+        embedded_ah = [
+            embedder(flattened_ah_state[:, i, :])
+            for i, embedder in enumerate(self.embedders)
+        ]
+        embedded_ah = torch.cat(embedded_ah, dim=-1)
+        batch_size = ah_state.shape[0]
+        feature = torch.cat((la_state, embedded_ah), dim=-1).view(batch_size, -1)
+        feature = self.embedder_final(feature)
+        val = self.model(feature.view(batch_size, -1))
 
-        return probs, value
+        return val