From daa6ef408b179fe1cb1332277166e138e6786bad Mon Sep 17 00:00:00 2001 From: Ellington Kirby Date: Tue, 3 Jun 2025 16:01:29 +0000 Subject: [PATCH 1/3] conditional import jaxlib.xla --- gpudrive/visualize/core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gpudrive/visualize/core.py b/gpudrive/visualize/core.py index f961f82a5..0e6a278ad 100644 --- a/gpudrive/visualize/core.py +++ b/gpudrive/visualize/core.py @@ -10,7 +10,6 @@ from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d.art3d import Poly3DCollection, Line3DCollection from matplotlib.colors import ListedColormap -from jaxlib.xla_extension import ArrayImpl import numpy as np import madrona_gpudrive from gpudrive.visualize import utils @@ -78,10 +77,12 @@ def initialize_static_scenario_data(self, controlled_agent_mask): ) self.controlled_agent_mask = controlled_agent_mask - if isinstance(controlled_agent_mask, ArrayImpl): - self.controlled_agent_mask = torch.from_numpy( - np.array(controlled_agent_mask) - ) + if self.backend == "jax": + from jaxlib.xla_extension import ArrayImpl + if isinstance(controlled_agent_mask, ArrayImpl): + self.controlled_agent_mask = torch.from_numpy( + np.array(controlled_agent_mask) + ) self.controlled_agent_mask = self.controlled_agent_mask.to(self.device) From 1c5b2386ba1f3b09cb971ea96b9ab7986142d70a Mon Sep 17 00:00:00 2001 From: Ellington Kirby Date: Tue, 3 Jun 2025 16:05:51 +0000 Subject: [PATCH 2/3] [DEV] add expert replay evaluation to get_model_performance --- examples/experimental/config/eval_config.yaml | 4 +- .../config/expert_replay_config.yaml | 37 +++++++++++++++++++ .../experimental/config/model_config.yaml | 4 +- examples/experimental/eval_utils.py | 33 +++++++++++------ .../experimental/get_model_performance.py | 2 + 5 files changed, 65 insertions(+), 15 deletions(-) create mode 100644 examples/experimental/config/expert_replay_config.yaml diff --git a/examples/experimental/config/eval_config.yaml b/examples/experimental/config/eval_config.yaml index 48202ab2b..8a8a5907d 100644 --- a/examples/experimental/config/eval_config.yaml +++ b/examples/experimental/config/eval_config.yaml @@ -3,7 +3,8 @@ test_dataset_size: 10_000 # Number of test scenarios to evaluate on # Environment settings train_dir: data/processed/training -test_dir: data/processed/validation +test_dir: data/processed/validation +file_prefix: null num_worlds: 50 # Number of parallel environments for evaluation max_controlled_agents: 64 # Maximum number of agents controlled by the model. @@ -26,6 +27,7 @@ obs_radius: 50.0 # Visibility radius of the agents init_roadgraph: False render_3d: True +action_type: "discrete" # Number of discretizations in the action space # Note: Make sure that this equals the discretizations that the policy # has been trained with diff --git a/examples/experimental/config/expert_replay_config.yaml b/examples/experimental/config/expert_replay_config.yaml new file mode 100644 index 000000000..213fa4840 --- /dev/null +++ b/examples/experimental/config/expert_replay_config.yaml @@ -0,0 +1,37 @@ +res_path: examples/experimental/dataframes # Store dataframes here +test_dataset_size: 300 # Number of test scenarios to evaluate on + +# Environment settings +train_dir: data/processed/training +test_dir: data/processed/validation +file_prefix: nuplan + +num_worlds: 100 # Number of parallel environments for evaluation +max_controlled_agents: 64 # Maximum number of agents controlled by the model. +ego_state: true +road_map_obs: true +partner_obs: true +norm_obs: true +remove_non_vehicles: true # If false, all agents are included (vehicles, pedestrians, cyclists) +lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types +reward_type: "weighted_combination" +collision_weight: -0.75 +off_road_weight: -0.75 +goal_achieved_weight: 1.0 +dynamics_model: "delta_local" +collision_behavior: "ignore" # Options: "remove", "stop" +dist_to_goal_threshold: 2.0 +polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount +sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random +obs_radius: 50.0 # Visibility radius of the agents +init_roadgraph: False +render_3d: True + +action_type: "continuous" +# Number of discretizations in the action space +# Note: Make sure that this equals the discretizations that the policy +# has been trained with +action_space_steer_disc: 13 +action_space_accel_disc: 7 + +device: "cuda" # Options: "cpu", "cuda" diff --git a/examples/experimental/config/model_config.yaml b/examples/experimental/config/model_config.yaml index 0f23a2432..1e97a5f7b 100644 --- a/examples/experimental/config/model_config.yaml +++ b/examples/experimental/config/model_config.yaml @@ -1,7 +1,7 @@ models_path: examples/experimental/models models: - - name: model_PPO____R_10000__02_27_09_19_10_626_003200 - train_dataset_size: 10_000 + - name: expert_replay + train_dataset_size: 1000 wandb: null trained_on: null \ No newline at end of file diff --git a/examples/experimental/eval_utils.py b/examples/experimental/eval_utils.py index 675f9afe1..3e49abecd 100644 --- a/examples/experimental/eval_utils.py +++ b/examples/experimental/eval_utils.py @@ -35,6 +35,9 @@ def __call__(self, obs, deterministic=False): ) return random_action, None, None, None +class ExpertReplayPolicy: + def __init__(self): + pass def load_policy(path_to_cpt, model_name, device, env=None): """Load a policy from a given path.""" @@ -42,7 +45,8 @@ def load_policy(path_to_cpt, model_name, device, env=None): # Load the saved checkpoint if model_name == "random_baseline": return RandomPolicy(env.action_space.n) - + if model_name == "expert_replay": + return ExpertReplayPolicy() else: # Load a trained model saved_cpt = torch.load( f=f"{path_to_cpt}/{model_name}.pt", @@ -110,22 +114,26 @@ def rollout( control_mask = env.cont_agent_mask live_agent_mask = control_mask.clone() - + expert_actions, _, _, _ = env.get_expert_actions() + for time_step in range(episode_len): print(f't: {time_step}') # Get actions for active agents if live_agent_mask.any(): - action, _, _, _ = policy( - next_obs[live_agent_mask], deterministic=deterministic - ) - - # Insert actions into a template - action_template = torch.zeros( - (num_worlds, max_agent_count), dtype=torch.int64, device=device - ) - action_template[live_agent_mask] = action.to(device) + if isinstance(policy, ExpertReplayPolicy): + action_template = expert_actions[:, :, time_step, :] + else: + action, _, _, _ = policy( + next_obs[live_agent_mask], deterministic=deterministic + ) + + # Insert actions into a template + action_template = torch.zeros( + (num_worlds, max_agent_count), dtype=torch.int64, device=device + ) + action_template[live_agent_mask] = action.to(device) # Step the environment env.step_dynamics(action_template) @@ -274,7 +282,8 @@ def make_env(config, train_loader, render_3d=False): data_loader=train_loader, max_cont_agents=config.max_controlled_agents, device=config.device, - render_config=render_config + render_config=render_config, + action_type=config.action_type, ) return env diff --git a/examples/experimental/get_model_performance.py b/examples/experimental/get_model_performance.py index 70443d852..9cad80174 100644 --- a/examples/experimental/get_model_performance.py +++ b/examples/experimental/get_model_performance.py @@ -64,6 +64,7 @@ def set_seed(seed: int): else 1000, sample_with_replacement=False, shuffle=False, + file_prefix=eval_config.file_predix ) test_loader = SceneDataLoader( @@ -74,6 +75,7 @@ def set_seed(seed: int): else 1000, sample_with_replacement=False, shuffle=True, + file_prefix=eval_config.file_predix ) # Rollouts From 24b701f4444cdc2ab317e36d24a62f64cb2cde57 Mon Sep 17 00:00:00 2001 From: Ellington Kirby Date: Tue, 3 Jun 2025 16:38:50 +0000 Subject: [PATCH 3/3] revert model_config and add comment --- examples/experimental/config/eval_config.yaml | 2 +- examples/experimental/config/model_config.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/experimental/config/eval_config.yaml b/examples/experimental/config/eval_config.yaml index 8a8a5907d..a867aed71 100644 --- a/examples/experimental/config/eval_config.yaml +++ b/examples/experimental/config/eval_config.yaml @@ -4,7 +4,7 @@ test_dataset_size: 10_000 # Number of test scenarios to evaluate on # Environment settings train_dir: data/processed/training test_dir: data/processed/validation -file_prefix: null +file_prefix: null # Controls the file prefix used when searching files in SceneDataLoader, defaults to tfrecord (WOMD data) num_worlds: 50 # Number of parallel environments for evaluation max_controlled_agents: 64 # Maximum number of agents controlled by the model. diff --git a/examples/experimental/config/model_config.yaml b/examples/experimental/config/model_config.yaml index 1e97a5f7b..0f23a2432 100644 --- a/examples/experimental/config/model_config.yaml +++ b/examples/experimental/config/model_config.yaml @@ -1,7 +1,7 @@ models_path: examples/experimental/models models: - - name: expert_replay - train_dataset_size: 1000 + - name: model_PPO____R_10000__02_27_09_19_10_626_003200 + train_dataset_size: 10_000 wandb: null trained_on: null \ No newline at end of file