Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
59d5ba2
initial commit
Jun 3, 2025
fa01598
updates with joseph and dan
Jun 3, 2025
3eeb3cf
added food respawn
Jun 4, 2025
97d9e3b
more modifications to the rewards
Jun 9, 2025
2db483c
more changes
Jun 17, 2025
73f03d2
saving current state of .h file
Jun 22, 2025
afe0d07
added observation rendering to c file
Jul 2, 2025
3ae697e
cleanup
Jul 2, 2025
9f93ba6
updated num_envs
Jul 17, 2025
f6fde22
update readme with build command
Nov 1, 2025
527672a
added latest ants env with demo rewards
matanitahdev Dec 12, 2025
fdc6060
updated simulation to use discrete moves more and to allow more movem…
matanitahdev Dec 15, 2025
96bea1b
updated reward func to only include demo and delivery rewards
matanitahdev Dec 15, 2025
34d42d1
removed pheromones and eliminated vision constraints
matanitahdev Dec 15, 2025
e32fbc3
added more complex reward vars
matanitahdev Dec 15, 2025
09482ac
updated to use same structure as target env
matanitahdev Dec 17, 2025
73b1346
reduced cone of vision and enabled pheromones
matanitahdev Dec 17, 2025
281f9f9
reducing steps in ini
matanitahdev Dec 17, 2025
3b70e9f
updated logging and improved handling
matanitahdev Dec 17, 2025
6cf2042
added pheromone range and density
matanitahdev Dec 18, 2025
55775c8
added pheromone direction to observations
Dec 24, 2025
56cdb35
added pheromone evaporation and vision changes
Jan 6, 2026
ca74e12
pulled 3.0 branch
Jan 6, 2026
bf2daae
small changes to scripts and setup.py to make PufferLib compatible wi…
matanitahdev Jan 6, 2026
843291d
Delete save_net_flat.py
matanitah Jan 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions pufferlib/config/ocean/ants.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[base]
package = ocean
env_name = puffer_ants
policy_name = Policy
rnn_name = Recurrent

[vec]
num_envs = 8

[env]
num_envs = 32

[train]
total_timesteps = 100_000_000
15 changes: 15 additions & 0 deletions pufferlib/ocean/ants/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
```
puffer train puffer_ants --train.device cpu --train.optimizer adam --neptune --neptune-name "matanitah" --neptune-project "ant-sim"
```

```
puffer eval puffer_ants --load-model-path experiments/ANTS-XXX.pt --train.device cpu --train.optimizer adam --neptune
```

```
scripts/build_ocean.sh ants
```

```
python setup.py build_ext --inplace
```
3 changes: 3 additions & 0 deletions pufferlib/ocean/ants/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from pufferlib.ocean.ants.ants import AntsEnv

__all__ = ['AntsEnv']
133 changes: 133 additions & 0 deletions pufferlib/ocean/ants/ants.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/* Ants: Pure C demo file for testing the environment.
* Build it with:
* bash scripts/build_ocean.sh ants local (debug)
* bash scripts/build_ocean.sh ants fast
*
* Following the Target env pattern for consistency.
*/
#include <stdio.h>
#include "ants.h"

int main() {
int num_ants = 64;
int num_obs = 27; // Observation space: colony, food, 5 pheromones (dx, dy, direction, strength each), has_food, heading, density

AntsEnv env = {
.width = 1280,
.height = 720,
.num_ants = num_ants,
.reward_food_pickup = 0.1f,
.reward_delivery = 10.0f
};

init(&env);

// Allocate buffers manually (normally passed from Python)
env.observations = calloc(env.num_ants * num_obs, sizeof(float));
env.actions = calloc(env.num_ants, sizeof(int));
env.rewards = calloc(env.num_ants, sizeof(float));
env.terminals = calloc(env.num_ants, sizeof(unsigned char));

// Always call reset and render first
c_reset(&env);
c_render(&env);

printf("Ant Colony Demo with Pheromones, Vision & Density Awareness\n");
printf("============================================================\n");
printf("Controls:\n");
printf(" [V] - Toggle vision cone visualization\n");
printf(" [P] - Toggle pheromone range visualization\n");
printf(" [ESC] - Exit\n\n");
printf("Features:\n");
printf(" - Vision: 75px range, 60° beam (for food) - improved for exploration\n");
printf(" - Pheromone sensing: 100px range, 360° omnidirectional\n");
printf(" - Density awareness: count friendly ants in pheromone range\n");
printf(" - Automatic pheromone trails when carrying food\n");
printf(" - Pheromone evaporation (500 step lifetime) - faster to break loops\n");
printf(" - Random exploration: ants stuck >100 steps get random turns (5%% chance)\n");
printf(" - Simple heuristic AI: seek food -> return to colony\n\n");

// Main loop - exit with ESC or close window
while (!WindowShouldClose()) {
// Simple demo AI: seek food when empty, return when full
for (int i = 0; i < env.num_ants; i++) {
Ant* ant = &env.ants[i];

// Simple heuristic AI
if (ant->has_food) {
// Return to colony
Colony* colony = &env.colonies[ant->colony_id];
float angle_to_colony = get_angle(ant->position, colony->position);
float angle_diff = wrap_angle(angle_to_colony - ant->direction);

if (angle_diff > M_PI / 8) {
env.actions[i] = ACTION_TURN_RIGHT;
} else if (angle_diff < -M_PI / 8) {
env.actions[i] = ACTION_TURN_LEFT;
} else {
env.actions[i] = ACTION_MOVE_FORWARD;
}
} else {
// Seek nearest food
float closest_dist_sq = env.width * env.width;
Vector2D closest_food = {0, 0};
bool found = false;

for (int f = 0; f < env.num_food_sources; f++) {
if (env.food_sources[f].amount > 0) {
float dist_sq = distance_squared(ant->position, env.food_sources[f].position);
if (dist_sq < closest_dist_sq) {
closest_dist_sq = dist_sq;
closest_food = env.food_sources[f].position;
found = true;
}
}
}

if (found) {
float angle_to_food = get_angle(ant->position, closest_food);
float angle_diff = wrap_angle(angle_to_food - ant->direction);

if (angle_diff > M_PI / 8) {
env.actions[i] = ACTION_TURN_RIGHT;
} else if (angle_diff < -M_PI / 8) {
env.actions[i] = ACTION_TURN_LEFT;
} else {
env.actions[i] = ACTION_MOVE_FORWARD;
}
} else {
// No food visible, just move forward
env.actions[i] = ACTION_MOVE_FORWARD;
}
}
}

c_step(&env);
c_render(&env);

// Print stats every 60 frames
if (env.tick % 60 == 0) {
float success_rate = env.log.total_resets > 0
? (env.log.successful_trips / env.log.total_resets * 100.0f)
: 0.0f;
printf("Tick: %d | C1: %d (%.0f%%) | C2: %d (%.0f%%) | Efficiency: %.1f steps/food | Throughput: %.2f | Success: %.1f%%\n",
env.tick,
env.colonies[0].food_collected,
env.log.total_deliveries > 0 ? (env.log.colony1_food / env.log.total_deliveries * 100.0f) : 0.0f,
env.colonies[1].food_collected,
env.log.total_deliveries > 0 ? (env.log.colony2_food / env.log.total_deliveries * 100.0f) : 0.0f,
env.log.avg_delivery_steps,
env.log.score,
success_rate);
}
}

// Cleanup
free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);

return 0;
}
Loading