Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CDB_study.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
echo "Running Mode: $MODE | Dimension: $DIM"

python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \
-p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
--cdb $CDB_VAL --n_epochs 3 --agent policy-gradient

Expand All @@ -47,7 +47,7 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
echo "Running Mode: $MODE | Dimension: $DIM"

python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_DIM${DIM} \
-p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
--cdb $CDB_VAL --n_epochs 3 --agent policy-gradient

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ uv run das <name> [options]
| `-x`, `--cdb` | `float` | `1.0` | **Checkpoint Division Exponent**; determines how quickly checkpoint length increases. |
| `-r`, `--state-representation` | `str` | `ELA` | Method used to extract features from the algorithm population. |
| `-d`, `--force-restarts` | `bool` | `False` | Enable selection of forcibly restarting optimizers. |
| `-D`, `--dimensionality` | `int` | `None` | Dimensionality of problems. |
| `-D`, `--dimensionality` | `list[int]` | `[2, 3, 5, 10, 20, 40]` | Dimensionality of problems. |
| `-E`, `--n_epochs` | `int` | `1` | Number of training epochs. |
| `-O`, `--reward-option` | `int` | `1` | ID of method used to compute reward. |

Expand Down
47 changes: 20 additions & 27 deletions dynamicalgorithmselection/agents/RLDAS_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ def _update_ah_history(
self.ah_vectors[alg_idx, 1] * H + sv_worst_current
) / (H + 1)

# Here I am computing current average

self.alg_usage_counts[alg_idx] += 1

def _save_context(self, optimizer, alg_name):
Expand Down Expand Up @@ -167,25 +169,20 @@ def optimize(self, fitness_function=None, args=None):
population_x, population_y = self.initialize()
self.n_function_evaluations = INITIAL_POPSIZE

best_idx = np.argmin(population_y)
best_y_global = population_y[best_idx]
best_x_global = population_x[best_idx].copy()

self.best_so_far_y = best_y_global
self.best_so_far_x = best_x_global

self.history.append(self.best_so_far_y)
fitness.append(float(self.best_so_far_y))

self.initial_cost = best_y_global if abs(best_y_global) > 1e-8 else 1.0
self.initial_cost = (
self.best_so_far_y if abs(self.best_so_far_y) > 1e-8 else 1.0
)

self.ah_vectors.fill(0.0)
self.alg_usage_counts.fill(0.0)
self.context_memory = {name: {} for name in self.alg_names}
self.context_memory["Common"] = {}

cost_new, cost_old = float(np.min(population_y)), float(np.min(population_y))
trajectory = []

clip_eps = self.options.get("ppo_eps", 0.3)
while self.n_function_evaluations < self.max_function_evaluations:
state = self.get_state(population_x, population_y)

Expand All @@ -203,7 +200,7 @@ def optimize(self, fitness_function=None, args=None):

x_best_old = population_x[np.argmin(population_y)].copy()
x_worst_old = population_x[np.argmax(population_y)].copy()
cost_old = np.copy(np.min(population_y))
cost_old = float(cost_new)

target_fes = min(
self.n_function_evaluations + self.schedule_interval,
Expand Down Expand Up @@ -237,13 +234,18 @@ def optimize(self, fitness_function=None, args=None):

x_best_new: np.ndarray = population_x[np.argmin(population_y)].copy()
x_worst_new: np.ndarray = population_x[np.argmax(population_y)].copy()
cost_new: float = np.min(population_y)
cost_new: float = self.best_so_far_y

self._update_ah_history(
action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
)

adc = (cost_old - cost_new) / self.initial_cost
# Update Agent Best State and History
if cost_new < self.best_so_far_y:
self.best_so_far_y = cost_new
self.best_so_far_x = x_best_new

adc = (cost_old - self.best_so_far_y) / self.initial_cost
if self.run:
self.run.log({"adc": adc})

Expand All @@ -260,24 +262,15 @@ def optimize(self, fitness_function=None, args=None):
}
)

best_y_global = min(best_y_global, cost_new)

# Update Agent Best State and History
if cost_new < self.best_so_far_y:
self.best_so_far_y = cost_new
self.best_so_far_x = x_best_new

self.history.append(self.best_so_far_y)
fitness.append(float(self.best_so_far_y))

self._n_generations += 1
self._print_verbose_info(fitness, self.best_so_far_y)
print(self._n_generations)
fes_end = self.n_function_evaluations
speed_factor = self.max_function_evaluations / fes_end
speed_factor = self.max_function_evaluations / self.n_function_evaluations

for step in trajectory:
final_reward = step["adc"] * speed_factor
final_reward = max(step["adc"] * speed_factor, 0)
self.rewards.append(final_reward)
la_state, ah_state = step["state"]

Expand All @@ -301,7 +294,7 @@ def optimize(self, fitness_function=None, args=None):
self.buffer,
epochs=K,
minibatch_size=32,
clip_eps=0.2,
clip_eps=clip_eps,
value_coef=0.5,
entropy_coef=0.01,
)
Expand Down Expand Up @@ -337,7 +330,7 @@ def ppo_update(
buffer,
epochs=4,
minibatch_size=None,
clip_eps=0.2,
clip_eps=0.3,
value_coef=0.5,
entropy_coef=0.01,
):
Expand Down Expand Up @@ -406,5 +399,5 @@ def ppo_update(

self.optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.5)
torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.1)
self.optimizer.step()
9 changes: 0 additions & 9 deletions dynamicalgorithmselection/agents/RLDAS_random_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,6 @@ def optimize(self, fitness_function=None, args=None):
population_x, population_y = self.initialize()
self.n_function_evaluations = INITIAL_POPSIZE

best_idx = np.argmin(population_y)
best_y_global = population_y[best_idx]
best_x_global = population_x[best_idx].copy()

self.best_so_far_y = best_y_global
self.best_so_far_x = best_x_global

self.history.append(self.best_so_far_y)
fitness.append(float(self.best_so_far_y))

Expand Down Expand Up @@ -179,8 +172,6 @@ def optimize(self, fitness_function=None, args=None):
action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
)

best_y_global = min(best_y_global, cost_new)

if cost_new < self.best_so_far_y:
self.best_so_far_y = cost_new
self.best_so_far_x = x_best_new
Expand Down
19 changes: 17 additions & 2 deletions dynamicalgorithmselection/agents/agent_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,21 @@ def normalize(self, state, update=True):
return np.clip(normalized_state, -5.0, 5.0)


def negative_slope_coefficient(group_cost, sample_cost): # [j]
gs = sample_cost.shape[0]
m = 10
gs -= gs % m # to be divisible
if gs < m: # not enough costs for m dividing
return 0
sorted_cost = np.array(sorted(list(zip(group_cost[:gs], sample_cost[:gs]))))
sorted_group = sorted_cost[:, 0].reshape(m, -1)
sorted_sample = sorted_cost[:, 1].reshape(m, -1)
Ms = np.mean(sorted_group, -1)
Ns = np.mean(sorted_sample, -1)
nsc = np.minimum((Ns[1:] - Ns[:-1]) / (Ms[1:] - Ms[:-1] + 1e-8), 0)
return np.sum(nsc)


def get_la_features(agent, pop_x, pop_y):
"""
Extracts 9 Landscape Analysis features based on the logic in Population.py.
Expand Down Expand Up @@ -424,15 +439,15 @@ def get_la_features(agent, pop_x, pop_y):
random_walk_samples = pop_x + np.random.normal(0, step_size, size=pop_x.shape)

# Evaluate the random walk samples
sample_costs = [agent.fitness_function(i) for i in random_walk_samples]
sample_costs = np.array([agent.fitness_function(i) for i in random_walk_samples])
agent.n_function_evaluations += n # Increment evaluations by population size

# Calculate differences between the walk and the current population
diffs = np.array(sample_costs) - pop_y

# --- Feature 5: Negative Slope Coefficient (nsc) ---
# Proportion of steps that resulted in an improvement
f5_nsc = np.sum(diffs < 0) / n
f5_nsc = negative_slope_coefficient(pop_y, sample_cost=sample_costs)

# --- Feature 6: Average Neutral Ratio (anr) ---
# Proportion of steps that resulted in practically zero change
Expand Down
4 changes: 0 additions & 4 deletions dynamicalgorithmselection/agents/agent_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Optional

import numpy as np

MAX_DIM = 40
Expand All @@ -14,7 +12,6 @@ def get_runtime_stats(
"""
:param fitness_history: list of tuples [fe, fitness] with only points where best so far fitness improved
:param function_evaluations: max number of function evaluations during run.
:param checkpoints: list of checkpoints by their n_function_evaluations
:return: dictionary of selected run statistics, ready to dump
"""
area_under_optimization_curve = 0.0
Expand Down Expand Up @@ -43,7 +40,6 @@ def get_extreme_stats(
"""
:param fitness_histories: list of lists of tuples [fe, fitness] with only points where best so far fitness improved for each algorithm
:param function_evaluations: max number of function evaluations during run.
:param checkpoints: list of checkpoints by their n_function_evaluations
:return: dictionary of selected run statistics, ready to dump
"""
all_improvements = []
Expand Down
110 changes: 80 additions & 30 deletions dynamicalgorithmselection/agents/ppo_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,48 +142,98 @@ def forward(self, advantage, log_prob):


class RLDASNetwork(nn.Module):
def __init__(self, d_dim, num_algorithms, la_dim=9):
def __init__(self, d_dim, num_algorithms):
super(RLDASNetwork, self).__init__()
self.L = num_algorithms
self.D = d_dim
self.la_dim = la_dim

self.ah_input_flat_dim = self.L * 2 * self.D
self.actor = RLDASActor(d_dim, num_algorithms, DEVICE)
self.critic = RLDASCritic(d_dim, num_algorithms, DEVICE)

self.ah_embed = nn.Sequential(
nn.Linear(self.ah_input_flat_dim, 64),
nn.ReLU(),
nn.Linear(64, 2 * self.L), # Output size aligned with paper description
nn.ReLU(),
)
self.fusion_input_dim = self.la_dim + (2 * self.L)
def forward(self, la_state, ah_state):
return self.actor(la_state, ah_state), self.critic(la_state, ah_state)

self.dv_layer = nn.Sequential(nn.Linear(self.fusion_input_dim, 64), nn.Tanh())

self.actor_head = nn.Sequential(
nn.Linear(64, 16), nn.Tanh(), nn.Linear(16, self.L), nn.Softmax(dim=-1)
)
class RLDASActor(nn.Module):
def __init__(self, dim, optimizer_num, device):
super().__init__()
self.device = device
self.optimizer_num = optimizer_num
self.embedders = [
(
nn.Sequential(
nn.Linear(dim, 64),
nn.ReLU(),
nn.Linear(64, 1),
nn.ReLU(),
)
).to(device)
for _ in range(2 * optimizer_num)
]

self.critic_head = nn.Sequential(
nn.Linear(64, 64),
nn.ReLU(),
nn.Linear(64, 1), # Scalar Value
)
self.embedder_final = nn.Sequential(
nn.Linear(9 + optimizer_num * 2, 64),
nn.Tanh(),
).to(device)
self.model = nn.Sequential(
nn.Linear(64, 16),
nn.Tanh(),
nn.Linear(16, optimizer_num),
nn.Softmax(dim=-1),
).to(device)

def forward(self, la_state, ah_state):
if ah_state.dim() > 2:
batch_size = ah_state.size(0)
ah_flat = ah_state.view(batch_size, -1)
else:
ah_flat = ah_state
flattened_ah_state = torch.flatten(ah_state, start_dim=1, end_dim=2)

embedded_ah = [
embedder(flattened_ah_state[:, i, :])
for i, embedder in enumerate(self.embedders)
]

v_ah = self.ah_embed(ah_flat)
embedded_ah = torch.cat(embedded_ah, dim=-1)
batch_size = ah_state.shape[0]
x = torch.cat((la_state, embedded_ah), dim=-1).view(batch_size, -1)
x = self.embedder_final(x)
probs = self.model(x)

combined = torch.cat([la_state, v_ah], dim=1)
return probs

dv = self.dv_layer(combined)

probs = self.actor_head(dv)
value = self.critic_head(dv)
class RLDASCritic(nn.Module):
def __init__(self, dim, optimizer_num, device):
super().__init__()
self.device = device
self.embedders = [
(
nn.Sequential(
nn.Linear(dim, 64),
nn.ReLU(),
nn.Linear(64, 1),
nn.ReLU(),
)
).to(device)
for _ in range(2 * optimizer_num)
]
self.embedder_final = nn.Sequential(
nn.Linear(9 + optimizer_num * 2, 64),
nn.Tanh(),
).to(device)
self.model = nn.Sequential(
nn.Linear(64, 16),
nn.Tanh(),
nn.Linear(16, 1),
).to(device)

def forward(self, la_state, ah_state):
flattened_ah_state = torch.flatten(ah_state, start_dim=1, end_dim=2)
embedded_ah = [
embedder(flattened_ah_state[:, i, :])
for i, embedder in enumerate(self.embedders)
]
embedded_ah = torch.cat(embedded_ah, dim=-1)
batch_size = ah_state.shape[0]
feature = torch.cat((la_state, embedded_ah), dim=-1).view(batch_size, -1)
feature = self.embedder_final(feature)
val = self.model(feature.view(batch_size, -1))

return probs, value
return val
Loading