Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
dba941d
inital
TBBristol Dec 1, 2025
319d30c
working demo
TBBristol Dec 2, 2025
10854dd
mmap changes, working on eval, gitignore maps files
TBBristol Dec 3, 2025
a7d1064
functioning as far as I can tell, no learning yet
TBBristol Dec 3, 2025
7b0b45b
running sweeps removed bptt sweep
TBBristol Dec 3, 2025
4c78c21
fixed clear logic in take action , added intermediated rewards
TBBristol Dec 4, 2025
9e4ffa8
intrweard into ini
TBBristol Dec 4, 2025
5b4159e
fixed get idx for puzzle to int32 to avoid wrap
TBBristol Dec 4, 2025
d69e3f9
generate maps tweaks
TBBristol Jan 1, 2026
60377d3
adding gen easy maps py
TBBristol Jan 1, 2026
854989f
gen maps make sure doesnt spawn at edghe, added int rew coeef 0.1 rem…
TBBristol Jan 1, 2026
aef6916
int targets logs
TBBristol Jan 1, 2026
bd15697
fied generate easy, penalty for box off
TBBristol Jan 1, 2026
9dfa988
map bin generation changes
TBBristol Jan 2, 2026
8714f61
Add boxoban-levels as submodule
TBBristol Jan 2, 2026
849edc5
dash diff, basic maps
TBBristol Jan 2, 2026
b28c7cf
levels add
TBBristol Jan 2, 2026
29ba0ec
demp working with difficulty etc
TBBristol Jan 2, 2026
1277ea2
custom cnn
TBBristol Jan 2, 2026
705632b
tidy and readme
TBBristol Jan 2, 2026
9f7a212
Add image to README for boxoban
TBBristol Jan 2, 2026
d963ab3
Fix game name in README
TBBristol Jan 2, 2026
60b7ef1
Fix formatting in README.md for Boxoban game
TBBristol Jan 2, 2026
1f39635
Update README with Easy difficulty level details
TBBristol Jan 2, 2026
40e418d
Vendor boxoban-levels (remove submodule)
TBBristol Jan 2, 2026
a87ba60
Merge branch 'boxoban' of https://github.com/TBBristol/PufferLib into…
TBBristol Jan 2, 2026
6c9a246
Update README to include .bin generation info
TBBristol Jan 2, 2026
18eb05b
Fix formatting of header in README.md
TBBristol Jan 2, 2026
1d4b79b
Update README with manual play instructions
TBBristol Jan 2, 2026
9870854
Update README with gameplay instructions
TBBristol Jan 2, 2026
e594ea6
Delete boxoban.dSYM directory
TBBristol Jan 2, 2026
4c3a150
resotre default ini
TBBristol Jan 2, 2026
cdc0304
Ignore dSYM artifacts
TBBristol Jan 2, 2026
8d53c46
map generation bug fix
TBBristol Jan 2, 2026
e52f2e9
training basic
TBBristol Jan 28, 2026
db5fd21
perf changes
Jan 28, 2026
8d66c6c
perf fixed?
Jan 28, 2026
ea21741
trained basic
Jan 28, 2026
5d6c300
fix map parse bug agent ontarget
Jan 29, 2026
b543745
embedds
TBBristol Jan 31, 2026
96d9401
added len coeff in ini
Feb 1, 2026
f56c243
ini
Feb 1, 2026
3b16c62
fps change
Feb 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
*_maps.bin
*_valid.bin

# Annoying temp files generated by Cython
c_*.c
pufferlib/extensions.c
Expand Down Expand Up @@ -162,3 +165,5 @@ pufferlib/ocean/impulse_wars/*-release/
pufferlib/ocean/impulse_wars/debug-*/
pufferlib/ocean/impulse_wars/release-*/
pufferlib/ocean/impulse_wars/benchmark/

*.dSYM/
Empty file added .gitmodules
Empty file.
2 changes: 1 addition & 1 deletion pufferlib/config/default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ scale = auto
[sweep.train.minibatch_size]
distribution = uniform_pow2
min = 8192
max = 65536
max = 16384
scale = auto

[sweep.train.learning_rate]
Expand Down
77 changes: 77 additions & 0 deletions pufferlib/config/ocean/boxoban.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
[base]
package = ocean
env_name = puffer_boxoban
policy_name = Boxoban
rnn_name = Recurrent

[vec]
num_envs = 2

[env]
num_envs = 1024
difficulty = "easy"
#reward per intermediate target (once per episode)
int_r_coeff = 0.25
#moving box off target
target_loss_pen_coeff = 0.1
#neg reward per step
length_reward_coeff = 0.0

[policy]


[train]

#BASIC
#adam_beta1 = 0.9398378409770966
#adam_beta2 = 0.9989332259552188
#adam_eps = 0.00000000000206071635
#anneal_lr = true
#batch_size = "auto"
#bptt_horizon = 64
#checkpoint_interval = 200
#clip_coef = 0.11478794743865613
#ent_coef = 0.0029962808388471485
#gae_lambda = 0.8493271024211292
#gamma = 0.9993401324579252
#learning_rate = 0.014686393387259022
#max_grad_norm = 0.9813762605915642
#min_lr_ratio = 0.0919479673291089
#minibatch_size = 16384
#optimizer = "muon"
#prio_alpha = 0.9306424191723168
#prio_beta0 = 0.6438373386977116
#update_epochs = 1
#total_timesteps = 30000000
#vf_clip_coef = 0.3663806329531388
#vf_coef = 2.528717985356681
#vtrace_c_clip = 1.2791176791333148
#vtrace_rho_clip = 1.1263937056422595
#
#EASY
adam_beta1 = 0.9401745430570272
adam_beta2 = 0.9131850488636376
adam_eps = 0.00000003606344842944
anneal_lr = "true"
batch_size = "auto"
bptt_horizon = 64
clip_coef = 0.03332279377492652
ent_coef = 0.052842630147383426
gae_lambda = 0.7936070081802409
gamma = 0.9589112076898656
learning_rate = 0.012534394901687526
max_grad_norm = 2.096905570892092
max_minibatch_size = 32768
min_lr_ratio = 0.28390691472987917
minibatch_size = 16384
optimizer = "muon"
precision = "float32"
prio_alpha = 0.974402356259871
prio_beta0 = 0.9402320261892596
total_timesteps = 74257668
update_epochs = 1
use_rnn = true
vf_clip_coef = 1.5271841942808977
vf_coef = 5
vtrace_c_clip = 2.7424047105884948
vtrace_rho_clip = 2.5409738450112447
101 changes: 101 additions & 0 deletions pufferlib/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,106 @@
import pufferlib.pytorch
import pufferlib.spaces

import numpy as np
import torch
import torch.nn as nn
import pufferlib


class Boxoban(nn.Module):
"""
Observations: always (B, 400) = 4 * (10*10), planes concatenated:
[agent_plane(100), target_plane(100), box_plane(100), wall_plane(100)]
Each plane is binary/float occupancy. Target+box can co-locate naturally.

Embedding per cell:
cell_vec = pos_embed[cell] + sum_{type present} type_embed[type]
"""

def __init__(self, env, hidden_size=128, embed_dim=8):
super().__init__()
self.hidden_size = hidden_size
self.embed_dim = embed_dim

self.is_multidiscrete = isinstance(env.single_action_space, pufferlib.spaces.MultiDiscrete)
self.is_continuous = isinstance(env.single_action_space, pufferlib.spaces.Box)

# Fixed layout
self.num_types = 4
self.num_cells = 100
self.obs_n = 400

self.type_embed = nn.Embedding(self.num_types, self.embed_dim)
self.pos_embed = nn.Embedding(self.num_cells, self.embed_dim)

self.encoder = nn.Sequential(
pufferlib.pytorch.layer_init(nn.Linear(self.num_cells * self.embed_dim, 2 * hidden_size)),
nn.GELU(),
pufferlib.pytorch.layer_init(nn.Linear(2 * hidden_size, hidden_size)),
nn.GELU(),
pufferlib.pytorch.layer_init(nn.Linear(hidden_size, hidden_size)),
nn.GELU(),
)

if self.is_multidiscrete:
self.action_nvec = tuple(env.single_action_space.nvec)
num_atns = sum(self.action_nvec)
self.decoder = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, num_atns), std=0.01)
elif not self.is_continuous:
num_atns = env.single_action_space.n
self.decoder = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, num_atns), std=0.01)
else:
self.decoder_mean = pufferlib.pytorch.layer_init(
nn.Linear(hidden_size, env.single_action_space.shape[0]), std=0.01
)
self.decoder_logstd = nn.Parameter(torch.zeros(1, env.single_action_space.shape[0]))

self.value = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, 1), std=1.0)

def forward_eval(self, observations, state=None):
hidden = self.encode_observations(observations, state=state)
logits, values = self.decode_actions(hidden)
return logits, values

def forward(self, observations, state=None):
return self.forward_eval(observations, state)

def encode_observations(self, observations, state=None):
# observations: (B, 400)
B = observations.shape[0]
x = observations
if x.shape[1] != self.obs_n:
raise ValueError(f"Expected observations shape (B, {self.obs_n}), got {tuple(x.shape)}")
if x.dtype not in (torch.float16, torch.float32, torch.bfloat16):
x = x.float()

# (B, 400) -> (B, 4, 100) -> (B, 100, 4)
x = x.view(B, self.num_types, self.num_cells).permute(0, 2, 1).contiguous()

# Sum entity-type embeddings for present types
type_vec = x @ self.type_embed.weight # (B, 100, embed_dim)

# Add position embedding
pos_vec = self.pos_embed.weight.unsqueeze(0).expand(B, -1, -1) # (B, 100, embed_dim)

cell_vec = type_vec + pos_vec
flat = cell_vec.view(B, self.num_cells * self.embed_dim)
return self.encoder(flat)

def decode_actions(self, hidden):
if self.is_multidiscrete:
logits = self.decoder(hidden).split(self.action_nvec, dim=1)
elif self.is_continuous:
mean = self.decoder_mean(hidden)
logstd = self.decoder_logstd.expand_as(mean)
std = torch.exp(logstd)
logits = torch.distributions.Normal(mean, std)
else:
logits = self.decoder(hidden)

values = self.value(hidden)
return logits, values


class Default(nn.Module):
'''Default PyTorch policy. Flattens obs and applies a linear layer.
Expand Down Expand Up @@ -79,6 +179,7 @@ def encode_observations(self, observations, state=None):
observations = torch.cat([v.view(batch_size, -1) for v in observations.values()], dim=1)
else:
observations = observations.view(batch_size, -1)
breakpoint()
return self.encoder(observations.float())

def decode_actions(self, hidden):
Expand Down
30 changes: 30 additions & 0 deletions pufferlib/ocean/boxoban/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#### BOXOBAN

A simple game based on Sokoban where the player must push all boxes on to the targets

Boxoban-levels contains the levels for the game as .txt files. There are various difficulties chosen by the environment variable 'difficulty' which can be 'basic', 'easy', 'medium', 'hard', 'unfiltered'.

Basic - only externals walls and one box

Easy - only externals walls and up to 4 boxes

These can both be generated using the generate_easy_maps.py script and settings the internals to required options and output str.

The hard, medium and unfiltered levels are taken from Googles Boxoban dataset and the license info is included in the file.
These maps are not easy to generate since they need to be solveable but also interesting, however there are a very good number of maps in those folders ~1M.

Medium and ulfiltered also have validation sets though these aren't used.


## The first time each difficulty is used a .bin is generated

Play manually using the .c compiled with bash scripts/build_ocean boxoban.

You can play different difficulties by adding the arg eg. ./boxoban easy HOWEVER the .bin needs to have been built

<img width="315" height="342" alt="image" src="https://github.com/user-attachments/assets/f5ea4eac-ec64-4444-b54a-b06c9ef2d252" />





48 changes: 48 additions & 0 deletions pufferlib/ocean/boxoban/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#define BOXOBAN_MAPS_IMPLEMENTATION //enables mmap
#include "boxoban.h"
#define Env Boxoban
#include "../env_binding.h"

//Map stuff
static int update_map_path(PyObject* kwargs) {
PyObject* map_path_obj = PyDict_GetItemString(kwargs, "map_path");
if (map_path_obj == NULL || !PyUnicode_Check(map_path_obj)) {
PyErr_SetString(PyExc_TypeError, "Boxoban requires a string 'map_path' kwarg");
return -1;
}

const char* new_path = PyUnicode_AsUTF8(map_path_obj);
if (new_path == NULL) {
return -1;
}

if (boxoban_set_map_path(new_path) != 0) {
PyErr_SetString(PyExc_RuntimeError, "Failed to set Boxoban map path");
return -1;
}

return 0;
}


static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
if (update_map_path(kwargs) != 0) {
return -1;
}
env->size = (int)unpack(kwargs, "size");
env->max_steps = (int)unpack(kwargs, "max_steps");
env->int_r_coeff = (float)unpack(kwargs, "int_r_coeff");
env->target_loss_pen_coeff = (float)unpack(kwargs, "target_loss_pen_coeff");
env->len_reward_coeff = (float)unpack(kwargs, "length_reward_coeff");
init(env);
return 0;
}

static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "perf", log->perf);
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
assign_to_dict(dict, "targets_hit", log->n_targets);
return 0;
}
23 changes: 23 additions & 0 deletions pufferlib/ocean/boxoban/boxoban-levels/CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# How to Contribute

We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.

## Contributor License Agreement

Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution,
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.

You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.

## Code reviews

All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.
Loading
Loading