Emerge-Lab · daphne-cornelisse · Apr 7, 2025 · Apr 8, 2025 · Apr 15, 2025 · Apr 22, 2025
diff --git a/.env.template b/.env.template
diff --git a/.gitignore b/.gitignore
@@ -27,9 +27,15 @@ data/raw/*
 data/processed/validation/*
 data/processed/training/*
 data/processed/testing/*
-data/processed/sampled/*
+data/processed/validation_interactive/*
+data/processed/pop_play/*
 data/processed/hand_designed/*
-analyze/figures/*
+figures/
+checkpoints/
+figures_data/
+data/other/*
+wosac/
+data/processed/validation_random/*
 
 # Logging
 /wandb
@@ -252,6 +258,10 @@ examples/experimental/logs/*
 # Sbatch scripts
 *.sh
 
+# Dataset
+wosac/
+other/
+
 # Videos
 videos/
 output_videos_larger_dataset/

diff --git a/baselines/ppo/config/ppo_base_puffer.yaml b/baselines/ppo/config/ppo_base_puffer.yaml
@@ -8,38 +8,46 @@ model_cpt: null
 
 environment: # Overrides default environment configs (see pygpudrive/env/config.py)
   name: "gpudrive"
-  num_worlds: 75 # Number of parallel environments
-  k_unique_scenes: 75 # Number of unique scenes to sample from
+  num_worlds: 100 # Number of parallel environments
+  k_unique_scenes: 100 # Number of unique scenes to sample from
   max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
   ego_state: true
   road_map_obs: true
   partner_obs: true
   norm_obs: true
-  remove_non_vehicles: true # If false, all agents are included (vehicles, pedestrians, cyclists)
+  add_reference_pos_xy: false
+  remove_non_vehicles: false # If false, all agents are included (vehicles, pedestrians, cyclists)
   lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
-  reward_type: "weighted_combination"
+  reward_type: "weighted_combination" # Options: "weighted_combination", "reward_conditioned"
   collision_weight: -0.75
   off_road_weight: -0.75
   goal_achieved_weight: 1.0
+  init_mode: all_non_trivial
+
+  # If reward_type is "reward_conditioned", the following parameters are used
+  condition_mode: random
+  collision_weight_lb: -3.0
+  collision_weight_ub: 0.01
+  goal_achieved_weight_lb: 1.0
+  goal_achieved_weight_ub: 3.0
+  off_road_weight_lb: -3.0
+  off_road_weight_ub: 0.0
+
   dynamics_model: "classic"
   collision_behavior: "ignore" # Options: "remove", "stop", "ignore"
+  goal_behavior: "remove" # Options: "remove", "stop", "ignore"
   dist_to_goal_threshold: 2.0
   polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
   sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
   obs_radius: 50.0 # Visibility radius of the agents
   action_space_steer_disc: 13
   action_space_accel_disc: 7
-  # Versatile Behavior Diffusion (VBD): This will slow down training
-  use_vbd: false
-  vbd_model_path: "gpudrive/integrations/vbd/weights/epoch=18.ckpt"
-  init_steps: 11
-  vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
-  vbd_in_obs: false
+  init_steps: 0
 
 wandb:
   entity: ""
   project: "gpudrive"
-  group: "test"
+  group: ""
   mode: "online" # Options: online, offline, disabled
   tags: ["ppo", "ff"]
 
@@ -53,16 +61,16 @@ train:
   compile_mode: "reduce-overhead"
 
   # # # Data sampling # # #
-  resample_scenes: false
+  resample_scenes: true
   resample_dataset_size: 10_000 # Number of unique scenes to sample from
   resample_interval: 2_000_000
   sample_with_replacement: true
   shuffle_dataset: false
 
   # # # PPO # # #
   torch_deterministic: false
-  total_timesteps: 1_000_000_000
-  batch_size: 131_072
+  total_timesteps: 2_000_000_000
+  batch_size: 262_144
   minibatch_size: 8192
   learning_rate: 3e-4
   anneal_lr: false
@@ -88,14 +96,14 @@ train:
     num_parameters: 0 # Total trainable parameters, to be filled at runtime
 
   # # # Checkpointing # # #
-  checkpoint_interval: 400 # Save policy every k iterations
+  checkpoint_interval: 500 # Save policy every k iterations
   checkpoint_path: "./runs"
 
   # # # Rendering # # #
   render: false # Determines whether to render the environment (note: will slow down training)
-  render_3d: true # Render simulator state in 3d or 2d
-  render_interval: 1 # Render every k iterations
-  render_k_scenarios: 10 # Number of scenarios to render
+  render_3d: false # Render simulator state in 3d or 2d
+  render_interval: 10 # Render every k iterations
+  render_k_scenarios: 1 # Number of scenarios to render
   render_format: "mp4" # Options: gif, mp4
   render_fps: 15 # Frames per second
   zoom_radius: 50

diff --git a/baselines/ppo/config/ppo_guided_autonomy.yaml b/baselines/ppo/config/ppo_guided_autonomy.yaml
@@ -0,0 +1,131 @@
+mode: "train"
+use_rnn: false
+eval_model_path: null
+baseline: false
+data_dir: data/processed/wosac/validation_interactive/json
+continue_training: false
+model_cpt: null
+
+environment: # Overrides default environment configs (see pygpudrive/env/config.py)
+  name: "gpudrive"
+  num_worlds: 10 # Number of parallel environments
+  k_unique_scenes: 1 # Number of unique scenes to sample from
+  max_controlled_agents: 32 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
+  ego_state: true
+  road_map_obs: true
+  partner_obs: true
+  norm_obs: true
+  add_previous_action: true
+
+  # Guidance through expert suggestions
+  guidance: true # If true, the agent will be guided by expert suggestions
+  guidance_mode: "log_replay" # Options: "log_replay", "vbd_amortized", "vbd_online"
+  add_reference_pos_xy: true # If true, a reference path is added to the ego observation
+  add_reference_speed: true # If true, the reference speeds are added to the ego observation
+  add_reference_heading: true # If true, the reference heading are added to the ego observation
+  smoothen_trajectory: true # If true, the velocities and headings are smoothed
+  guidance_dropout_prob: 0.0 # Probability of out guidance points
+
+  # Reward function
+  reward_type: "guided_autonomy"
+  collision_weight: -0.1
+  off_road_weight: -0.1
+  guidance_speed_weight: 0.005
+  guidance_heading_weight: 0.005
+  smoothness_weight: 0.0001
+
+  init_mode: wosac_train
+  dynamics_model: "classic"
+  remove_non_vehicles: false
+  collision_behavior: "ignore"
+  goal_behavior: "ignore"
+  polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
+  sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
+  obs_radius: 50.0 # Visibility radius of the agents
+  view_cone_half_angle: 3.14159 # -> 360° total view field
+  view_occlude_objects: false
+  action_space_steer_disc: 13
+  action_space_accel_disc: 7
+  action_space_head_tilt_disc: 1
+  vehicle_steer_range: [-1.57, 1.57]  # pi/2 = 1.57, pi/3 = 1.05
+  vehicle_accel_range: [-4.0, 4.0]
+  head_tilt_action_range: [-0.7854, 0.7854] # radians (±45°)
+  init_steps: 0 # Warmup steps
+  goal_achieved_weight: 0.0
+
+wandb:
+  entity: ""
+  project: "humanlike"
+  group: ""
+  mode: "online" # Options: online, offline, disabled
+  tags: ["ppo", "ff"]
+
+train:
+  exp_id: guidance_logs # Set dynamically in the script if needed
+  seed: 42
+  cpu_offload: false
+  device: "cuda" # Dynamically set to cuda if available, else cpu
+  bptt_horizon: 1
+  compile: false
+  compile_mode: "reduce-overhead"
+
+  # # # Data sampling # # #
+  resample_scenes: false
+  resample_dataset_size: 10_000 # Number of unique scenes to sample from
+  resample_interval: 5_000_000
+  sample_with_replacement: true
+  shuffle_dataset: true
+  file_prefix: ""
+
+  # # # PPO # # #
+  torch_deterministic: false
+  total_timesteps: 4_000_000_000
+  batch_size: 65536
+  minibatch_size: 8192
+  learning_rate: 3e-4
+  anneal_lr: true
+  gamma: 1.0
+  gae_lambda: 0.95
+  update_epochs: 4
+  norm_adv: true
+  clip_coef: 0.2
+  clip_vloss: false
+  vf_clip_coef: 0.2
+  ent_coef: 0.01
+  vf_coef: 0.5
+  max_grad_norm: 0.5
+  target_kl: null
+
+  # # # Logging # # #
+  log_window: 100
+  track_realism_metrics: true # Log human-like metrics
+  track_n_worlds: 3 # Number of worlds to track
+
+  # # # Network # # #
+  network:
+    embed_dim: 256 # Embedding of the input features
+    dropout: 0.01
+    class_name: "Agent"
+    num_parameters: 0 # Total trainable parameters, to be filled at runtime
+
+  # # # Checkpointing # # #
+  checkpoint_interval: 200 # Save policy every k iterations
+  checkpoint_path: "./runs"
+
+  # # # Rendering # # #
+  render: true # Determines whether to render the environment (note: will slow down training)
+  render_3d: false # Render simulator state in 3d or 2d
+  render_interval: 10 # Render every k iterations
+  render_every_t: 5 # Render every k timesteps
+  render_k_scenarios: 1 # Number of scenarios to render
+  render_agent_idx: [0] # Agent observations to render
+  render_format: "mp4" # Options: gif, mp4
+  render_fps: 5 # Frames per second
+  zoom_radius: 100
+  plot_guidance_pos_xy: true
+
+vec:
+  backend: "native" # Only native is currently supported
+  num_workers: 1
+  env_batch_size: 1
+  zero_copy: false
diff --git a/baselines/ppo/config/ppo_population.yaml b/baselines/ppo/config/ppo_population.yaml
@@ -0,0 +1,114 @@
+mode: "train"
+use_rnn: false
+eval_model_path: null
+baseline: false
+data_dir: data/processed/pop_play
+continue_training: false
+model_cpt: null
+
+environment: # Overrides default environment configs (see pygpudrive/env/config.py)
+  name: "gpudrive"
+  num_worlds: 10 # Number of parallel environments
+  k_unique_scenes: 10 # Number of unique scenes to sample from
+  max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
+  ego_state: true
+  road_map_obs: true
+  partner_obs: true
+  norm_obs: true
+  remove_non_vehicles: false # If false, all agents are included (vehicles, pedestrians, cyclists)
+  lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
+  reward_type: "reward_conditioned" # Options: "weighted_combination", "reward_conditioned", "guided_autonomy"
+  collision_weight: -0.75
+  off_road_weight: -0.75
+  goal_achieved_weight: 1.0
+  init_mode: all_non_trivial
+
+  # If reward_type is "reward_conditioned", the following parameters are used
+  randomize_rewards: true
+  condition_mode: random # Options: random, fixed
+  collision_weight_lb: -3.0
+  collision_weight_ub: 0.0
+  goal_achieved_weight_lb: 1.0
+  goal_achieved_weight_ub: 3.0
+  off_road_weight_lb: -3.0
+  off_road_weight_ub: 0.0
+
+  dynamics_model: "classic"
+  collision_behavior: "ignore" # Options: "remove", "stop", "ignore"
+  dist_to_goal_threshold: 2.0
+  polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
+  sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
+  obs_radius: 50.0 # Visibility radius of the agents
+  action_space_steer_disc: 13
+  action_space_accel_disc: 7
+  init_steps: 0 # Warmup steps
+
+wandb:
+  entity: ""
+  project: "kshotagents"
+  group: "debug_mini"
+  mode: "online" # Options: online, offline, disabled
+  tags: ["ppo", "ff"]
+
+train:
+  exp_id: # Set dynamically in the script if needed
+  seed: 42
+  cpu_offload: false
+  device: "cuda"  # Dynamically set to cuda if available, else cpu
+  bptt_horizon: 1
+  compile: false
+  compile_mode: "reduce-overhead"
+
+  # # # Data sampling # # #
+  resample_scenes: false
+  resample_dataset_size: 500 # Number of unique scenes to sample from
+  resample_interval: 2_000_000
+  sample_with_replacement: false
+  shuffle_dataset: false
+
+  # # # PPO # # #
+  torch_deterministic: false
+  total_timesteps: 2_000_000_000
+  batch_size: 131072
+  minibatch_size: 8192
+  learning_rate: 3e-4
+  anneal_lr: true
+  gamma: 0.99
+  gae_lambda: 0.95
+  update_epochs: 4
+  norm_adv: true
+  clip_coef: 0.2
+  clip_vloss: false
+  vf_clip_coef: 0.2
+  ent_coef: 0.001
+  vf_coef: 0.5
+  max_grad_norm: 0.5
+  target_kl: null
+  log_window: 1000
+
+  # # # Network # # #
+  network:
+    embed_dim: 64 # Embedding of the input features
+    dropout: 0.01
+    class_name: "Agent"
+    num_parameters: 0 # Total trainable parameters, to be filled at runtime
+
+  # # # Checkpointing # # #
+  checkpoint_interval: 250 # Save policy every k iterations
+  checkpoint_path: "./runs"
+
+  # # # Rendering # # #
+  render: false # Determines whether to render the environment (note: will slow down training)
+  render_3d: false # Render simulator state in 3d or 2d
+  render_interval: 50 # Render every k iterations
+  render_k_scenarios: 1 # Number of scenarios to render
+  render_format: "mp4" # Options: gif, mp4
+  render_fps: 20 # Frames per second
+  zoom_radius: 100
+  plot_guidance_pos_xy: true
+
+vec:
+  backend: "native" # Only native is currently supported
+  num_workers: 1
+  env_batch_size: 1
+  zero_copy: false