diff --git a/baselines/ppo/config/ppo_base_puffer.yaml b/baselines/ppo/config/ppo_base_puffer.yaml index 9f985667a..90614954b 100644 --- a/baselines/ppo/config/ppo_base_puffer.yaml +++ b/baselines/ppo/config/ppo_base_puffer.yaml @@ -3,105 +3,119 @@ use_rnn: false eval_model_path: null baseline: false data_dir: data/processed/training -continue_training: false -model_cpt: null - -environment: # Overrides default environment configs (see pygpudrive/env/config.py) +continue_training: true +model_cpt: /home/wbk/gpudrive/runs/PPO__C__S_72__01_20_15_12_22_098/model_PPO__C__S_72__01_20_15_12_22_098_027823.pt +environment: # 覆盖环境配置(见 gpudrive/env/config.py) name: "gpudrive" - num_worlds: 75 # Number of parallel environments - k_unique_scenes: 75 # Number of unique scenes to sample from - max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp + num_worlds: 18 # 并行环境数量(进一步降低以减少重采样时显存峰值) + k_unique_scenes: 72 # 采样场景数量(减少单次负载) + max_controlled_agents: 64 # 最大控制代理数量(需与环境掩码维度一致) ego_state: true road_map_obs: true partner_obs: true norm_obs: true - remove_non_vehicles: true # If false, all agents are included (vehicles, pedestrians, cyclists) - lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types + remove_non_vehicles: true # 如果为 false,则包括所有代理(车辆、行人、自行车) + lidar_obs: false # 注意:设为 true 会关闭其他观测类型 reward_type: "weighted_combination" - collision_weight: -0.75 - off_road_weight: -0.75 - goal_achieved_weight: 1.0 + collision_weight: -3.0 # 提高碰撞惩罚:减少转弯时的碰撞 + off_road_weight: -3.0 # 降低:允许适度冒险 + goal_achieved_weight: 1.0 # 大幅提高:让"到达"比"安全躲避"更有吸引力 + # 避免"动几下就停"的塑形项(仅 weighted_combination 生效) + time_penalty: 0.005 # 提高:增强推进压力 + idle_speed_threshold: 0.5 + idle_penalty: 0.02 # 降低:避免惩罚过重 + # 进度奖励:距离目标越近奖励越高(密集正向信号) + progress_reward_weight: 0.1 # 降低:避免改变奖励scale太多 + progress_reward_scale: 20.0 dynamics_model: "classic" - collision_behavior: "ignore" # Options: "remove", "stop", "ignore" + collision_behavior: "remove" # 选项:"remove"、"stop"、"ignore" dist_to_goal_threshold: 2.0 - polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount - sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random - obs_radius: 50.0 # Visibility radius of the agents + polyline_reduction_threshold: 0.1 # 采样点率(0 表示使用所有最近点,1 表示最大稀疏度),需与 kMaxAgentMapObservationsCount 平衡 + sampling_seed: 42 # 若设置则场景采样可复现;为 None 则随机 + obs_radius: 50.0 # 智能体可见半径 action_space_steer_disc: 13 action_space_accel_disc: 7 - # Versatile Behavior Diffusion (VBD): This will slow down training + # Versatile Behavior Diffusion (VBD):开启会降低训练速度 use_vbd: false vbd_model_path: "gpudrive/integrations/vbd/weights/epoch=18.ckpt" init_steps: 11 - vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function + vbd_trajectory_weight: 0.1 # 奖励中 VBD 轨迹距离项的权重 vbd_in_obs: false wandb: entity: "" project: "gpudrive" group: "test" - mode: "online" # Options: online, offline, disabled + mode: "online" # 选项:online、offline、disabled tags: ["ppo", "ff"] train: - exp_id: PPO # Set dynamically in the script if needed + exp_id: PPO # 如需可在脚本中动态设置 seed: 42 cpu_offload: false - device: "cuda" # Dynamically set to cuda if available, else cpu + device: "cuda" # 若可用则使用 cuda,否则使用 cpu bptt_horizon: 1 compile: false compile_mode: "reduce-overhead" - # # # Data sampling # # # - resample_scenes: false - resample_dataset_size: 10_000 # Number of unique scenes to sample from - resample_interval: 2_000_000 + # # # 数据采样 # # # + resample_scenes: false # 开启重采样,提升泛化能力 + resample_dataset_size: 10_000 + resample_interval: 10_000_000 # 50M步训练约5次重采样,平衡稳定性和泛化 sample_with_replacement: true shuffle_dataset: false # # # PPO # # # torch_deterministic: false - total_timesteps: 1_000_000_000 - batch_size: 131_072 - minibatch_size: 8192 - learning_rate: 3e-4 - anneal_lr: false + total_timesteps: 500_000_000 + batch_size: 18432 + minibatch_size: 3072 + # 适度提高学习率:适应新的奖励函数(碰撞/越界惩罚提高) + learning_rate: 2e-4 # 从1e-4提高到2e-4,加快适应新奖励信号 + anneal_lr: true # 开启学习率衰减:从2e-4开始,随训练逐渐降低,先快速适应后精细调优 gamma: 0.99 gae_lambda: 0.95 - update_epochs: 4 + # 收紧更新:避免策略变化太大 + update_epochs: 3 # 减少更新次数 norm_adv: true - clip_coef: 0.2 - clip_vloss: false + clip_coef: 0.15 # 收紧clip,限制策略变化幅度 + # value 更稳 + clip_vloss: true vf_clip_coef: 0.2 - ent_coef: 0.0001 + # 降低探索:策略已学会走,现在需要更稳定(减少晃动) + ent_coef: 0.0003 # 从 0.001 降低到 0.0003 vf_coef: 0.3 max_grad_norm: 0.5 - target_kl: null + # KL 早停,避免重采样后一次更新过猛导致震荡 + target_kl: 0.02 log_window: 1000 - # # # Network # # # + # # # 网络 # # # network: - input_dim: 64 # Embedding of the input features - hidden_dim: 128 # Latent dimension + input_dim: 64 # 输入特征嵌入维度 + hidden_dim: 128 # 潜在维度 dropout: 0.01 class_name: "NeuralNet" - num_parameters: 0 # Total trainable parameters, to be filled at runtime + num_parameters: 0 # 可训练参数数量(运行时填充) + # 新增:观察融合网络配置 + fusion_type: "attention" # 选项: "simple", "attention", "adaptive" + num_attention_heads: 4 # 注意力头数(仅在fusion_type="attention"时有效) - # # # Checkpointing # # # - checkpoint_interval: 400 # Save policy every k iterations + # # # 检查点保存 # # # + checkpoint_interval: 200 # 每隔 k 次迭代保存一次 checkpoint_path: "./runs" - # # # Rendering # # # - render: false # Determines whether to render the environment (note: will slow down training) - render_3d: true # Render simulator state in 3d or 2d - render_interval: 1 # Render every k iterations - render_k_scenarios: 10 # Number of scenarios to render - render_format: "mp4" # Options: gif, mp4 - render_fps: 15 # Frames per second + # # # 渲染 # # # + render: false # 是否渲染环境(开启会减慢训练) + render_3d: true # 渲染 3D 或 2D + render_interval: 1 # 每隔 k 次迭代渲染 + render_k_scenarios: 0 # 训练期建议为 0,避免额外 IO/不确定性 + render_format: "mp4" # 选项:gif、mp4 + render_fps: 15 # 每秒帧数 zoom_radius: 50 vec: - backend: "native" # Only native is currently supported + backend: "native" # 目前仅支持 native num_workers: 1 env_batch_size: 1 zero_copy: false diff --git a/baselines/ppo/config/ppo_base_sb3.yaml b/baselines/ppo/config/ppo_base_sb3.yaml index d601b6e61..75d00efb2 100644 --- a/baselines/ppo/config/ppo_base_sb3.yaml +++ b/baselines/ppo/config/ppo_base_sb3.yaml @@ -1,5 +1,5 @@ data_dir: "data/processed/examples" -num_worlds: 100 +num_worlds: 30 k_unique_scenes: 4 device: "cuda" # or "cpu" diff --git a/gpudrive/env/config.py b/gpudrive/env/config.py index 4ef009996..bc010d486 100755 --- a/gpudrive/env/config.py +++ b/gpudrive/env/config.py @@ -42,9 +42,9 @@ class EnvConfig: # Set the weights for the reward components # R = a * collided + b * goal_achieved + c * off_road - collision_weight: float = 0.0 + collision_weight: float = -0.5 goal_achieved_weight: float = 1.0 - off_road_weight: float = 0.0 + off_road_weight: float = -0.5 # Road observation algorithm settings road_obs_algorithm: str = "linear" # Algorithm for road observations @@ -101,6 +101,22 @@ class EnvConfig: reward_type: str = "sparse_on_goal_achieved" # Alternatively, "weighted_combination", "distance_to_logs", "distance_to_vdb_trajs", "reward_conditioned" + # --- weighted_combination 额外稠密项(用于避免“动几下就停”的局部最优) --- + # 每一步的时间成本(仅在 reward_type == "weighted_combination" 时生效) + # 建议从 0.001~0.005 试起;过大可能导致冒进/碰撞上升 + time_penalty: float = 0.0 + + # 低速/怠速惩罚(仅在 reward_type == "weighted_combination" 时生效) + # 当 speed < idle_speed_threshold 且未完成/未终止时,额外扣 idle_penalty + idle_speed_threshold: float = 0.5 + idle_penalty: float = 0.0 + + # 进度奖励:距离目标越近,每步获得的奖励越高(密集正向信号) + # reward += progress_reward_weight * exp(-dist_to_goal / progress_reward_scale) + # 建议 progress_reward_weight: 0.1~0.3, progress_reward_scale: 15~30 + progress_reward_weight: float = 0.0 # 默认关闭 + progress_reward_scale: float = 20.0 # 距离衰减因子 + condition_mode: str = "random" # Options: "random", "fixed", "preset" # Define upper and lower bounds for reward components if using reward_conditioned diff --git a/gpudrive/env/dataset.py b/gpudrive/env/dataset.py index 3ce244730..a1c821e31 100644 --- a/gpudrive/env/dataset.py +++ b/gpudrive/env/dataset.py @@ -35,6 +35,8 @@ def __post_init__(self): ) # Set the random seed for reproducibility + if self.seed is None: + self.seed = 42 self.random_gen = random.Random(self.seed) # Create the dataset from valid files in the directory @@ -84,8 +86,9 @@ def __len__(self): def __next__(self) -> List[str]: if self.sample_with_replacement: # Ensure deterministic behavior + base_seed = 0 if self.seed is None else self.seed random_gen = random.Random( - self.seed + self.current_index + base_seed + self.current_index ) # Changing the seed per batch # Determine the batch size using the random generator to shuffle the indices diff --git a/gpudrive/env/env_puffer.py b/gpudrive/env/env_puffer.py index 811971cf6..583feab10 100644 --- a/gpudrive/env/env_puffer.py +++ b/gpudrive/env/env_puffer.py @@ -52,7 +52,7 @@ def __init__( off_road_weight=-0.5, goal_achieved_weight=1, dist_to_goal_threshold=2.0, - polyline_reduction_threshold=0.1, + polyline_reduction_threshold=0.1, #折线简化阈值,是一个用于控制道路图观察点采样密度的参数。 remove_non_vehicles=True, obs_radius=50.0, use_vbd=False, @@ -60,16 +60,16 @@ def __init__( vbd_trajectory_weight=0.1, render=False, render_3d=True, - render_interval=50, - render_k_scenarios=3, + render_interval=50, #渲染间隔,每隔多少步渲染一次 + render_k_scenarios=3, #渲染场景数量 render_agent_obs=False, render_format="mp4", render_fps=15, zoom_radius=50, - buf=None, + buf=None, #缓冲区,用于存储环境状态和动作 **kwargs, ): - assert buf is None, "GPUDrive set up only for --vec native" + assert buf is None, "GPUDrive set up only for --vec native" #断言缓冲区为空,表示只支持原生环境 if data_loader is None: data_loader = SceneDataLoader( @@ -78,7 +78,7 @@ def __init__( dataset_size=loader_dataset_size, sample_with_replacement=loader_sample_with_replacement, shuffle=loader_shuffle, - ) + ) #数据加载器,用于加载场景数据 if device is None: device = "cuda" if torch.cuda.is_available() else "cpu" @@ -239,6 +239,13 @@ def step(self, action): Args: action: A numpy array of actions for the controlled agents. Shape: (num_worlds, max_cont_agents_per_env) + 执行一步环境交互: + 1. 应用动作 + 2. 执行物理仿真 + 3. 计算奖励 + 4. 处理终止状态 + 5. 异步重置完成的环境 + 6. 返回新的观测 """ # Set the action for the controlled agents diff --git a/gpudrive/env/env_torch.py b/gpudrive/env/env_torch.py index b8481c911..f6b499012 100755 --- a/gpudrive/env/env_torch.py +++ b/gpudrive/env/env_torch.py @@ -491,6 +491,51 @@ def get_rewards( + off_road_weight * off_road ) + # 稠密塑形:避免"停住最优" + # 仅对未 done 且未达成目标的 agent 生效(done/goal 后不再额外惩罚) + needs_shaping = ( + self.config.time_penalty != 0.0 + or self.config.idle_penalty != 0.0 + or self.config.progress_reward_weight != 0.0 + ) + if needs_shaping: + done = ( + self.sim.done_tensor() + .to_torch() + .clone() + .squeeze(dim=2) + .to(weighted_rewards.device) + .to(torch.float) + ) + active = (1.0 - done) * (1.0 - goal_achieved) + + if self.config.time_penalty != 0.0: + weighted_rewards = weighted_rewards - self.config.time_penalty * active + + # 获取速度(用于idle惩罚) + if self.config.idle_penalty != 0.0: + speed = ( + self.sim.self_observation_tensor() + .to_torch() + .clone()[:, :, 0] + .to(weighted_rewards.device) + .to(torch.float) + ) + is_idle = (speed < self.config.idle_speed_threshold).to(torch.float) + weighted_rewards = weighted_rewards - self.config.idle_penalty * is_idle * active + + # 进度奖励:距离目标越近,每步正奖励越高(密集引导信号) + if self.config.progress_reward_weight != 0.0: + self_obs = self.sim.self_observation_tensor().to_torch().clone() + rel_goal_x = self_obs[:, :, 4].to(weighted_rewards.device) + rel_goal_y = self_obs[:, :, 5].to(weighted_rewards.device) + dist_to_goal = torch.sqrt(rel_goal_x ** 2 + rel_goal_y ** 2 + 1e-6) + progress_reward = self.config.progress_reward_weight * torch.exp( + -dist_to_goal / self.config.progress_reward_scale + ) + # 只给仍在行驶中的 agent + weighted_rewards = weighted_rewards + progress_reward * active + return weighted_rewards elif self.config.reward_type == "reward_conditioned": diff --git a/gpudrive/integrations/puffer/ppo.py b/gpudrive/integrations/puffer/ppo.py index bdc65ed45..f6e99bd7b 100644 --- a/gpudrive/integrations/puffer/ppo.py +++ b/gpudrive/integrations/puffer/ppo.py @@ -239,6 +239,20 @@ def train(data): dones_np = experience.dones_np[idxs] values_np = experience.values_np[idxs] rewards_np = experience.rewards_np[idxs] + + # 数值稳定性检查:检查输入数据 + if np.isnan(dones_np).any() or np.isnan(values_np).any() or np.isnan(rewards_np).any(): + print("Warning: NaN detected in GAE inputs, replacing with zeros") + dones_np = np.nan_to_num(dones_np, nan=0.0) + values_np = np.nan_to_num(values_np, nan=0.0) + rewards_np = np.nan_to_num(rewards_np, nan=0.0) + + # 检查是否有Inf值 + if np.isinf(values_np).any() or np.isinf(rewards_np).any(): + print("Warning: Inf detected in GAE inputs, clipping values") + values_np = np.clip(values_np, -1e6, 1e6) + rewards_np = np.clip(rewards_np, -1e6, 1e6) + advantages_np = compute_gae( dones_np, values_np, rewards_np, config.gamma, config.gae_lambda ) @@ -347,7 +361,12 @@ def train(data): with profile.train_misc: if config.anneal_lr: - frac = 1.0 - data.global_step / config.total_timesteps + # 支持继续训练时从配置的学习率开始衰减 + lr_start_step = getattr(data, 'lr_start_step', 0) + lr_total_steps = config.total_timesteps - lr_start_step + steps_since_start = data.global_step - lr_start_step + frac = 1.0 - steps_since_start / lr_total_steps + frac = max(0.0, frac) # 防止负数 lrnow = float(frac) * float(config.learning_rate) data.optimizer.param_groups[0]["lr"] = lrnow diff --git a/gpudrive/networks/late_fusion.py b/gpudrive/networks/late_fusion.py index aa14b3e36..c7f1a9292 100644 --- a/gpudrive/networks/late_fusion.py +++ b/gpudrive/networks/late_fusion.py @@ -3,7 +3,7 @@ import torch from torch import nn from torch.distributions.utils import logits_to_probs -import pufferlib.models +import pufferlib.models #主要作用为正交初始化神经网络层 from gpudrive.env import constants from huggingface_hub import PyTorchModelHubMixin from box import Box @@ -12,21 +12,21 @@ TOP_K_ROAD_POINTS = madrona_gpudrive.kMaxAgentMapObservationsCount - +#计算log概率 def log_prob(logits, value): value = value.long().unsqueeze(-1) value, log_pmf = torch.broadcast_tensors(value, logits) value = value[..., :1] return log_pmf.gather(-1, value).squeeze(-1) - +#计算熵 def entropy(logits): min_real = torch.finfo(logits.dtype).min logits = torch.clamp(logits, min=min_real) p_log_p = logits * logits_to_probs(logits) return -p_log_p.sum(-1) - +#给定 logits(动作概率),返回采样/选择的 action、对应的 logprob 与 entropy def sample_logits( logits: Union[torch.Tensor, List[torch.Tensor]], action=None, @@ -83,6 +83,8 @@ def __init__( max_controlled_agents=64, obs_dim=2984, # Size of the flattened observation vector (hardcoded) config=None, # Optional config + fusion_type="attention", # 新增:融合类型选择 + num_attention_heads=4, # 新增:注意力头数 ): super().__init__() self.input_dim = input_dim @@ -94,12 +96,18 @@ def __init__( self.num_modes = 3 # Ego, partner, road graph self.dropout = dropout self.act_func = nn.Tanh() if act_func == "tanh" else nn.GELU() + self.fusion_type = fusion_type + self.num_attention_heads = num_attention_heads # Indices for unpacking the observation self.ego_state_idx = constants.EGO_FEAT_DIM self.partner_obs_idx = ( constants.PARTNER_FEAT_DIM * self.max_controlled_agents ) + + # Set default value for vbd_in_obs + self.vbd_in_obs = False + if config is not None: self.config = Box(config) if "reward_type" in self.config: @@ -109,19 +117,21 @@ def __init__( self.ego_state_idx += 3 self.partner_obs_idx += 3 - self.vbd_in_obs = self.config.vbd_in_obs + # Override default if config contains vbd_in_obs + if hasattr(self.config, 'vbd_in_obs'): + self.vbd_in_obs = self.config.vbd_in_obs # Calculate the VBD predictions size: 91 timesteps * 5 features = 455 self.vbd_size = 91 * 5 self.ego_embed = nn.Sequential( - pufferlib.pytorch.layer_init( + pufferlib.pytorch.layer_init( #初始化线性层 nn.Linear(self.ego_state_idx, input_dim) ), - nn.LayerNorm(input_dim), - self.act_func, - nn.Dropout(self.dropout), - pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), + nn.LayerNorm(input_dim), #层归一化 + self.act_func, #激活函数 + nn.Dropout(self.dropout), #丢弃,防止过拟合 + pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), #初始化线性层 ) self.partner_embed = nn.Sequential( @@ -155,8 +165,31 @@ def __init__( pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), ) + # 新增:注意力融合机制 + if self.fusion_type == "attention": + self.attention_fusion = nn.MultiheadAttention( + embed_dim=input_dim, + num_heads=self.num_attention_heads, + dropout=self.dropout, + batch_first=True + ) + self.attention_norm = nn.LayerNorm(input_dim) + # 注意力融合后的输出维度(使用flatten保留完整信息) + fusion_output_dim = input_dim * 3 + elif self.fusion_type == "adaptive": + # 自适应权重融合 + self.adaptive_weights = nn.Sequential( + nn.Linear(input_dim * self.num_modes, 64), + self.act_func, + nn.Linear(64, self.num_modes), + nn.Softmax(dim=-1) + ) + fusion_output_dim = input_dim + else: # 原始简单拼接 + fusion_output_dim = self.input_dim * self.num_modes + self.shared_embed = nn.Sequential( - nn.Linear(self.input_dim * self.num_modes, self.hidden_dim), + nn.Linear(fusion_output_dim, self.hidden_dim), nn.Dropout(self.dropout), ) @@ -191,11 +224,50 @@ def encode_observations(self, observation): partner_embed, _ = self.partner_embed(road_objects).max(dim=1) road_map_embed, _ = self.road_map_embed(road_graph).max(dim=1) - # Concatenate the embeddings - embed = torch.cat([ego_embed, partner_embed, road_map_embed], dim=1) + # 新增:不同的融合策略 + if self.fusion_type == "attention": + # 注意力融合 + embed = self._attention_fusion(ego_embed, partner_embed, road_map_embed) + elif self.fusion_type == "adaptive": + # 自适应权重融合 + embed = self._adaptive_fusion(ego_embed, partner_embed, road_map_embed) + else: + # 原始简单拼接 + embed = torch.cat([ego_embed, partner_embed, road_map_embed], dim=1) return self.shared_embed(embed) + def _attention_fusion(self, ego_embed, partner_embed, road_embed): + """使用多头注意力机制进行模态融合""" + # 组合所有模态: (batch, 3, input_dim) + modalities = torch.stack([ego_embed, partner_embed, road_embed], dim=1) + + # 自注意力融合 + attended, attention_weights = self.attention_fusion( + modalities, modalities, modalities + ) + + # 残差连接 + 层归一化 + attended = self.attention_norm(attended + modalities) + + # 使用flatten保留完整信息,而不是平均池化 + # 这样可以避免信息瓶颈(192维 vs 64维),提高最终性能 + return attended.flatten(start_dim=1) + + def _adaptive_fusion(self, ego_embed, partner_embed, road_embed): + """使用自适应权重进行模态融合""" + # 拼接所有模态特征 + combined = torch.cat([ego_embed, partner_embed, road_embed], dim=-1) + + # 计算每个模态的权重 + weights = self.adaptive_weights(combined) + + # 加权融合 + modalities = torch.stack([ego_embed, partner_embed, road_embed], dim=-1) + weighted_fusion = (modalities * weights.unsqueeze(1)).sum(dim=-1) + + return weighted_fusion + def forward(self, obs, action=None, deterministic=False): # Encode the observations diff --git a/gpudrive/visualize/core.py b/gpudrive/visualize/core.py index f961f82a5..6ec9d09b3 100644 --- a/gpudrive/visualize/core.py +++ b/gpudrive/visualize/core.py @@ -100,6 +100,7 @@ def plot_simulator_state( zoom_radius: int = 100, plot_log_replay_trajectory: bool = False, agent_positions: Optional[torch.Tensor] = None, + predicted_trajectories: Optional[torch.Tensor] = None, backward_goals: bool = False, policy_masks: Optional[Dict[int,Dict[str,torch.Tensor]]] = None, ): @@ -432,6 +433,15 @@ def plot_simulator_state( except Exception as e: print(f"Warning: Could not add colorbar: {e}") + # 绘制预测轨迹(未来轨迹) + if predicted_trajectories is not None: + self._plot_predicted_trajectories( + ax=ax, + env_idx=env_idx, + predicted_trajectories=predicted_trajectories, + controlled_live=controlled_live, + ) + # Determine center point for zooming if center_agent_idx is not None: center_x = global_agent_states.pos_x[ @@ -1574,3 +1584,82 @@ def plot_agent_observation( ax.set_yticks([]) return fig + + def _plot_predicted_trajectories( + self, + ax: matplotlib.axes.Axes, + env_idx: int, + predicted_trajectories: torch.Tensor, + controlled_live: torch.Tensor, + ) -> None: + """ + 绘制预测的未来轨迹 + + Args: + ax: Matplotlib axis + env_idx: 环境索引 + predicted_trajectories: [num_worlds, max_agents, horizon, 2] 预测轨迹 + controlled_live: [max_agents] 受控且存活的智能体掩码 + """ + if predicted_trajectories is None: + return + + # 预测轨迹颜色(使用虚线表示预测) + pred_color = "#FF6B6B" # 红色,表示预测 + pred_alpha = 0.6 + pred_linewidth = 2.0 + + for agent_idx in range(predicted_trajectories.shape[1]): + if controlled_live[agent_idx]: + trajectory = predicted_trajectories[env_idx, agent_idx, :, :] # [horizon, 2] + + # 过滤无效点 + valid_mask = ( + (trajectory[:, 0] != 0) + & (trajectory[:, 1] != 0) + & (torch.abs(trajectory[:, 0]) < OUT_OF_BOUNDS) + & (torch.abs(trajectory[:, 1]) < OUT_OF_BOUNDS) + ) + valid_trajectory = trajectory[valid_mask] + + if len(valid_trajectory) > 1: + points = valid_trajectory.cpu().numpy() + + if self.render_3d: + # 3D 绘制 + trajectory_height = 0.1 # 稍微高一点以区分预测轨迹 + ax.plot( + points[:, 0], + points[:, 1], + trajectory_height, + color=pred_color, + linestyle="--", + linewidth=pred_linewidth, + alpha=pred_alpha, + zorder=2, + label="Predicted" if agent_idx == 0 else "", + ) + else: + # 2D 绘制 + ax.plot( + points[:, 0], + points[:, 1], + color=pred_color, + linestyle="--", + linewidth=pred_linewidth, + alpha=pred_alpha, + zorder=2, + label="Predicted" if agent_idx == 0 else "", + ) + + # 在轨迹终点添加标记 + if len(points) > 0: + ax.scatter( + points[-1, 0], + points[-1, 1], + color=pred_color, + marker="x", + s=50, + alpha=pred_alpha, + zorder=3, + )