Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
1e0566c
Control goal behavior (#402)
daphne-cornelisse Apr 7, 2025
0734514
Set add_goal_state=False by default
daphne-cornelisse Apr 8, 2025
8ada1fe
Merge remote-tracking branch 'origin/main' into dev
daphne-cornelisse Apr 15, 2025
ddc8571
Feat/vbd amortize (#409)
nadarenator Apr 22, 2025
b9c5496
Improved reward conditioning and waypoint following support (#391)
daphne-cornelisse Apr 23, 2025
cc4d0f2
Add evaluation code for WOSAC (#411)
zzx9636 Apr 24, 2025
8ae4903
add 2025 wosac eval (#420)
zzx9636 Apr 24, 2025
5e2a099
Better warmup steps handling (#422)
nadarenator Apr 26, 2025
f08d2f0
Refactor: Unify guidance mechanisms (#421)
daphne-cornelisse Apr 28, 2025
a7512ab
Add WOSAC initialization modes (#426)
daphne-cornelisse Apr 29, 2025
131626b
Better agent POV visualization and add LiDAr (#424)
daphne-cornelisse Apr 29, 2025
47713ff
Hot model fix
daphne-cornelisse May 1, 2025
e7f3bcb
Support for online guidance with `vbd_online` (#431)
daphne-cornelisse May 2, 2025
f965066
Integrate optional smoothening pipeline for guidance data. (#434)
daphne-cornelisse May 2, 2025
25f22d1
Temporary fix: Access average z position (elevation) from log replays…
daphne-cornelisse May 2, 2025
1777d0c
Res/scaling (#436)
daphne-cornelisse May 8, 2025
4e10440
model looks kinda okay? (#439)
eugenevinitsky May 9, 2025
e0a32c8
Ev/memory decrease (#442)
eugenevinitsky May 10, 2025
1cad3fc
update log for each scenario (#433)
zzx9636 May 12, 2025
e99ec54
Bug fixes to get rid of large position values with `vbd_online` (#435)
daphne-cornelisse May 12, 2025
5fa3c37
RL training improvements (#440)
daphne-cornelisse May 13, 2025
f1dd3be
make vbd 91 steps (#446)
nadarenator May 13, 2025
605cd4b
Fix dynamics model and some setting updates (#447)
daphne-cornelisse May 13, 2025
473e3e8
wosac dataset (#448)
nadarenator May 15, 2025
23f91f4
Intermediate update (#449)
daphne-cornelisse May 15, 2025
f0ad20f
More updates (#450)
daphne-cornelisse May 15, 2025
58620b1
Small fix
daphne-cornelisse May 15, 2025
2cffa6a
fix the wosac eval
zzx9636 May 15, 2025
ffbb5f3
Merge branch 'dev' of github.com:Emerge-Lab/gpudrive into dev
zzx9636 May 15, 2025
ad3ce01
fix amortization script (#452)
nadarenator May 27, 2025
56529c2
Added configs, params and code for the view cone setting (#454)
mpragnay May 30, 2025
3db4deb
Type-aware action space (#455)
daphne-cornelisse Jun 1, 2025
60081e2
Fix out of range error for VBD trajectory (#456)
rjs02 Jun 4, 2025
bc62868
simple occlusion check working
rjs02 Jun 2, 2025
442b210
add occlusion check with nested loop instead of bvh
rjs02 Jun 4, 2025
e7ebc10
ray trace against 8 corner points instead of only center
rjs02 Jun 4, 2025
6f28c63
change vector to array for gpu compatibility
rjs02 Jun 4, 2025
254deb4
resolved rebase conflicts with dev
rjs02 Jun 5, 2025
1138290
add action for head tilt animation
rjs02 Jun 5, 2025
909655f
change line style
rjs02 Jun 6, 2025
cd3ccf3
update config files to new parameters
rjs02 Jun 8, 2025
0b674cb
set full view and no occlusion as default
rjs02 Jun 8, 2025
39aa469
add addtional sampling points for occlusion check
rjs02 Jun 8, 2025
aecae0c
rename parameter and add documentation
rjs02 Jun 8, 2025
ef57e8b
Merge pull request #460 from Emerge-Lab/rs/occluded-obs
rjs02 Jun 8, 2025
f492dd0
pass config parameters into config object
rjs02 Jun 12, 2025
957f57e
Merge pull request #463 from Emerge-Lab/rs/fix-pufferenv-config
rjs02 Jun 13, 2025
b705eb8
Integrate traffic light states (#425)
daphne-cornelisse Jun 16, 2025
1f58ba6
[FIX] add back missing _set_continuous_action_space (#470)
EllingtonKirby Jun 20, 2025
97190c7
Minor fixes for partial observability (#489)
rjs02 Jul 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions .env.template

This file was deleted.

14 changes: 12 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,15 @@ data/raw/*
data/processed/validation/*
data/processed/training/*
data/processed/testing/*
data/processed/sampled/*
data/processed/validation_interactive/*
data/processed/pop_play/*
data/processed/hand_designed/*
analyze/figures/*
figures/
checkpoints/
figures_data/
data/other/*
wosac/
data/processed/validation_random/*

# Logging
/wandb
Expand Down Expand Up @@ -252,6 +258,10 @@ examples/experimental/logs/*
# Sbatch scripts
*.sh

# Dataset
wosac/
other/

# Videos
videos/
output_videos_larger_dataset/
Expand Down
44 changes: 26 additions & 18 deletions baselines/ppo/config/ppo_base_puffer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,46 @@ model_cpt: null

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 75 # Number of parallel environments
k_unique_scenes: 75 # Number of unique scenes to sample from
num_worlds: 100 # Number of parallel environments
k_unique_scenes: 100 # Number of unique scenes to sample from
max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
partner_obs: true
norm_obs: true
remove_non_vehicles: true # If false, all agents are included (vehicles, pedestrians, cyclists)
add_reference_pos_xy: false
remove_non_vehicles: false # If false, all agents are included (vehicles, pedestrians, cyclists)
lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
reward_type: "weighted_combination"
reward_type: "weighted_combination" # Options: "weighted_combination", "reward_conditioned"
collision_weight: -0.75
off_road_weight: -0.75
goal_achieved_weight: 1.0
init_mode: all_non_trivial

# If reward_type is "reward_conditioned", the following parameters are used
condition_mode: random
collision_weight_lb: -3.0
collision_weight_ub: 0.01
goal_achieved_weight_lb: 1.0
goal_achieved_weight_ub: 3.0
off_road_weight_lb: -3.0
off_road_weight_ub: 0.0

dynamics_model: "classic"
collision_behavior: "ignore" # Options: "remove", "stop", "ignore"
goal_behavior: "remove" # Options: "remove", "stop", "ignore"
dist_to_goal_threshold: 2.0
polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
obs_radius: 50.0 # Visibility radius of the agents
action_space_steer_disc: 13
action_space_accel_disc: 7
# Versatile Behavior Diffusion (VBD): This will slow down training
use_vbd: false
vbd_model_path: "gpudrive/integrations/vbd/weights/epoch=18.ckpt"
init_steps: 11
vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
vbd_in_obs: false
init_steps: 0

wandb:
entity: ""
project: "gpudrive"
group: "test"
group: ""
mode: "online" # Options: online, offline, disabled
tags: ["ppo", "ff"]

Expand All @@ -53,16 +61,16 @@ train:
compile_mode: "reduce-overhead"

# # # Data sampling # # #
resample_scenes: false
resample_scenes: true
resample_dataset_size: 10_000 # Number of unique scenes to sample from
resample_interval: 2_000_000
sample_with_replacement: true
shuffle_dataset: false

# # # PPO # # #
torch_deterministic: false
total_timesteps: 1_000_000_000
batch_size: 131_072
total_timesteps: 2_000_000_000
batch_size: 262_144
minibatch_size: 8192
learning_rate: 3e-4
anneal_lr: false
Expand All @@ -88,14 +96,14 @@ train:
num_parameters: 0 # Total trainable parameters, to be filled at runtime

# # # Checkpointing # # #
checkpoint_interval: 400 # Save policy every k iterations
checkpoint_interval: 500 # Save policy every k iterations
checkpoint_path: "./runs"

# # # Rendering # # #
render: false # Determines whether to render the environment (note: will slow down training)
render_3d: true # Render simulator state in 3d or 2d
render_interval: 1 # Render every k iterations
render_k_scenarios: 10 # Number of scenarios to render
render_3d: false # Render simulator state in 3d or 2d
render_interval: 10 # Render every k iterations
render_k_scenarios: 1 # Number of scenarios to render
render_format: "mp4" # Options: gif, mp4
render_fps: 15 # Frames per second
zoom_radius: 50
Expand Down
131 changes: 131 additions & 0 deletions baselines/ppo/config/ppo_guided_autonomy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
mode: "train"
use_rnn: false
eval_model_path: null
baseline: false
data_dir: data/processed/wosac/validation_interactive/json
continue_training: false
model_cpt: null

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 10 # Number of parallel environments
k_unique_scenes: 1 # Number of unique scenes to sample from
max_controlled_agents: 32 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
partner_obs: true
norm_obs: true
add_previous_action: true

# Guidance through expert suggestions
guidance: true # If true, the agent will be guided by expert suggestions
guidance_mode: "log_replay" # Options: "log_replay", "vbd_amortized", "vbd_online"
add_reference_pos_xy: true # If true, a reference path is added to the ego observation
add_reference_speed: true # If true, the reference speeds are added to the ego observation
add_reference_heading: true # If true, the reference heading are added to the ego observation
smoothen_trajectory: true # If true, the velocities and headings are smoothed
guidance_dropout_prob: 0.0 # Probability of out guidance points

# Reward function
reward_type: "guided_autonomy"
collision_weight: -0.1
off_road_weight: -0.1
guidance_speed_weight: 0.005
guidance_heading_weight: 0.005
smoothness_weight: 0.0001

init_mode: wosac_train
dynamics_model: "classic"
remove_non_vehicles: false
collision_behavior: "ignore"
goal_behavior: "ignore"
polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
obs_radius: 50.0 # Visibility radius of the agents
view_cone_half_angle: 3.14159 # -> 360° total view field
view_occlude_objects: false
action_space_steer_disc: 13
action_space_accel_disc: 7
action_space_head_tilt_disc: 1
vehicle_steer_range: [-1.57, 1.57] # pi/2 = 1.57, pi/3 = 1.05
vehicle_accel_range: [-4.0, 4.0]
head_tilt_action_range: [-0.7854, 0.7854] # radians (±45°)
init_steps: 0 # Warmup steps
goal_achieved_weight: 0.0

wandb:
entity: ""
project: "humanlike"
group: ""
mode: "online" # Options: online, offline, disabled
tags: ["ppo", "ff"]

train:
exp_id: guidance_logs # Set dynamically in the script if needed
seed: 42
cpu_offload: false
device: "cuda" # Dynamically set to cuda if available, else cpu
bptt_horizon: 1
compile: false
compile_mode: "reduce-overhead"

# # # Data sampling # # #
resample_scenes: false
resample_dataset_size: 10_000 # Number of unique scenes to sample from
resample_interval: 5_000_000
sample_with_replacement: true
shuffle_dataset: true
file_prefix: ""

# # # PPO # # #
torch_deterministic: false
total_timesteps: 4_000_000_000
batch_size: 65536
minibatch_size: 8192
learning_rate: 3e-4
anneal_lr: true
gamma: 1.0
gae_lambda: 0.95
update_epochs: 4
norm_adv: true
clip_coef: 0.2
clip_vloss: false
vf_clip_coef: 0.2
ent_coef: 0.01
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: null

# # # Logging # # #
log_window: 100
track_realism_metrics: true # Log human-like metrics
track_n_worlds: 3 # Number of worlds to track

# # # Network # # #
network:
embed_dim: 256 # Embedding of the input features
dropout: 0.01
class_name: "Agent"
num_parameters: 0 # Total trainable parameters, to be filled at runtime

# # # Checkpointing # # #
checkpoint_interval: 200 # Save policy every k iterations
checkpoint_path: "./runs"

# # # Rendering # # #
render: true # Determines whether to render the environment (note: will slow down training)
render_3d: false # Render simulator state in 3d or 2d
render_interval: 10 # Render every k iterations
render_every_t: 5 # Render every k timesteps
render_k_scenarios: 1 # Number of scenarios to render
render_agent_idx: [0] # Agent observations to render
render_format: "mp4" # Options: gif, mp4
render_fps: 5 # Frames per second
zoom_radius: 100
plot_guidance_pos_xy: true

vec:
backend: "native" # Only native is currently supported
num_workers: 1
env_batch_size: 1
zero_copy: false
114 changes: 114 additions & 0 deletions baselines/ppo/config/ppo_population.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
mode: "train"
use_rnn: false
eval_model_path: null
baseline: false
data_dir: data/processed/pop_play
continue_training: false
model_cpt: null

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 10 # Number of parallel environments
k_unique_scenes: 10 # Number of unique scenes to sample from
max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
partner_obs: true
norm_obs: true
remove_non_vehicles: false # If false, all agents are included (vehicles, pedestrians, cyclists)
lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
reward_type: "reward_conditioned" # Options: "weighted_combination", "reward_conditioned", "guided_autonomy"
collision_weight: -0.75
off_road_weight: -0.75
goal_achieved_weight: 1.0
init_mode: all_non_trivial

# If reward_type is "reward_conditioned", the following parameters are used
randomize_rewards: true
condition_mode: random # Options: random, fixed
collision_weight_lb: -3.0
collision_weight_ub: 0.0
goal_achieved_weight_lb: 1.0
goal_achieved_weight_ub: 3.0
off_road_weight_lb: -3.0
off_road_weight_ub: 0.0

dynamics_model: "classic"
collision_behavior: "ignore" # Options: "remove", "stop", "ignore"
dist_to_goal_threshold: 2.0
polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
obs_radius: 50.0 # Visibility radius of the agents
action_space_steer_disc: 13
action_space_accel_disc: 7
init_steps: 0 # Warmup steps

wandb:
entity: ""
project: "kshotagents"
group: "debug_mini"
mode: "online" # Options: online, offline, disabled
tags: ["ppo", "ff"]

train:
exp_id: # Set dynamically in the script if needed
seed: 42
cpu_offload: false
device: "cuda" # Dynamically set to cuda if available, else cpu
bptt_horizon: 1
compile: false
compile_mode: "reduce-overhead"

# # # Data sampling # # #
resample_scenes: false
resample_dataset_size: 500 # Number of unique scenes to sample from
resample_interval: 2_000_000
sample_with_replacement: false
shuffle_dataset: false

# # # PPO # # #
torch_deterministic: false
total_timesteps: 2_000_000_000
batch_size: 131072
minibatch_size: 8192
learning_rate: 3e-4
anneal_lr: true
gamma: 0.99
gae_lambda: 0.95
update_epochs: 4
norm_adv: true
clip_coef: 0.2
clip_vloss: false
vf_clip_coef: 0.2
ent_coef: 0.001
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: null
log_window: 1000

# # # Network # # #
network:
embed_dim: 64 # Embedding of the input features
dropout: 0.01
class_name: "Agent"
num_parameters: 0 # Total trainable parameters, to be filled at runtime

# # # Checkpointing # # #
checkpoint_interval: 250 # Save policy every k iterations
checkpoint_path: "./runs"

# # # Rendering # # #
render: false # Determines whether to render the environment (note: will slow down training)
render_3d: false # Render simulator state in 3d or 2d
render_interval: 50 # Render every k iterations
render_k_scenarios: 1 # Number of scenarios to render
render_format: "mp4" # Options: gif, mp4
render_fps: 20 # Frames per second
zoom_radius: 100
plot_guidance_pos_xy: true

vec:
backend: "native" # Only native is currently supported
num_workers: 1
env_batch_size: 1
zero_copy: false
Loading