diff --git a/baselines/ppo/config/ppo_base_puffer.yaml b/baselines/ppo/config/ppo_base_puffer.yaml
index 9f985667a..90614954b 100644
--- a/baselines/ppo/config/ppo_base_puffer.yaml
+++ b/baselines/ppo/config/ppo_base_puffer.yaml
@@ -3,105 +3,119 @@ use_rnn: false
 eval_model_path: null
 baseline: false
 data_dir: data/processed/training
-continue_training: false
-model_cpt: null
-
-environment: # Overrides default environment configs (see pygpudrive/env/config.py)
+continue_training: true
+model_cpt: /home/wbk/gpudrive/runs/PPO__C__S_72__01_20_15_12_22_098/model_PPO__C__S_72__01_20_15_12_22_098_027823.pt
+environment: # 覆盖环境配置（见 gpudrive/env/config.py）
   name: "gpudrive"
-  num_worlds: 75 # Number of parallel environments
-  k_unique_scenes: 75 # Number of unique scenes to sample from
-  max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
+  num_worlds: 18 # 并行环境数量（进一步降低以减少重采样时显存峰值）
+  k_unique_scenes: 72 # 采样场景数量（减少单次负载）
+  max_controlled_agents: 64 # 最大控制代理数量（需与环境掩码维度一致）
   ego_state: true
   road_map_obs: true
   partner_obs: true
   norm_obs: true
-  remove_non_vehicles: true # If false, all agents are included (vehicles, pedestrians, cyclists)
-  lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
+  remove_non_vehicles: true # 如果为 false，则包括所有代理（车辆、行人、自行车）
+  lidar_obs: false # 注意：设为 true 会关闭其他观测类型
   reward_type: "weighted_combination"
-  collision_weight: -0.75
-  off_road_weight: -0.75
-  goal_achieved_weight: 1.0
+  collision_weight: -3.0  # 提高碰撞惩罚：减少转弯时的碰撞
+  off_road_weight: -3.0   # 降低：允许适度冒险
+  goal_achieved_weight: 1.0  # 大幅提高：让"到达"比"安全躲避"更有吸引力
+  # 避免"动几下就停"的塑形项（仅 weighted_combination 生效）
+  time_penalty: 0.005  # 提高：增强推进压力
+  idle_speed_threshold: 0.5
+  idle_penalty: 0.02   # 降低：避免惩罚过重
+  # 进度奖励：距离目标越近奖励越高（密集正向信号）
+  progress_reward_weight: 0.1  # 降低：避免改变奖励scale太多
+  progress_reward_scale: 20.0
   dynamics_model: "classic"
-  collision_behavior: "ignore" # Options: "remove", "stop", "ignore"
+  collision_behavior: "remove" # 选项："remove"、"stop"、"ignore"
   dist_to_goal_threshold: 2.0
-  polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
-  sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
-  obs_radius: 50.0 # Visibility radius of the agents
+  polyline_reduction_threshold: 0.1 # 采样点率（0 表示使用所有最近点，1 表示最大稀疏度），需与 kMaxAgentMapObservationsCount 平衡
+  sampling_seed: 42 # 若设置则场景采样可复现；为 None 则随机
+  obs_radius: 50.0 # 智能体可见半径
   action_space_steer_disc: 13
   action_space_accel_disc: 7
-  # Versatile Behavior Diffusion (VBD): This will slow down training
+  # Versatile Behavior Diffusion (VBD)：开启会降低训练速度
   use_vbd: false
   vbd_model_path: "gpudrive/integrations/vbd/weights/epoch=18.ckpt"
   init_steps: 11
-  vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
+  vbd_trajectory_weight: 0.1 # 奖励中 VBD 轨迹距离项的权重
   vbd_in_obs: false
 
 wandb:
   entity: ""
   project: "gpudrive"
   group: "test"
-  mode: "online" # Options: online, offline, disabled
+  mode: "online" # 选项：online、offline、disabled
   tags: ["ppo", "ff"]
 
 train:
-  exp_id: PPO # Set dynamically in the script if needed
+  exp_id: PPO # 如需可在脚本中动态设置
   seed: 42
   cpu_offload: false
-  device: "cuda"  # Dynamically set to cuda if available, else cpu
+  device: "cuda"  # 若可用则使用 cuda，否则使用 cpu
   bptt_horizon: 1
   compile: false
   compile_mode: "reduce-overhead"
 
-  # # # Data sampling # # #
-  resample_scenes: false
-  resample_dataset_size: 10_000 # Number of unique scenes to sample from
-  resample_interval: 2_000_000
+  # # # 数据采样 # # #
+  resample_scenes: false   # 开启重采样，提升泛化能力
+  resample_dataset_size: 10_000
+  resample_interval: 10_000_000  # 50M步训练约5次重采样，平衡稳定性和泛化
   sample_with_replacement: true
   shuffle_dataset: false
 
   # # # PPO # # #
   torch_deterministic: false
-  total_timesteps: 1_000_000_000
-  batch_size: 131_072
-  minibatch_size: 8192
-  learning_rate: 3e-4
-  anneal_lr: false
+  total_timesteps: 500_000_000
+  batch_size: 18432
+  minibatch_size: 3072
+  # 适度提高学习率：适应新的奖励函数（碰撞/越界惩罚提高）
+  learning_rate: 2e-4  # 从1e-4提高到2e-4，加快适应新奖励信号
+  anneal_lr: true     # 开启学习率衰减：从2e-4开始，随训练逐渐降低，先快速适应后精细调优
   gamma: 0.99
   gae_lambda: 0.95
-  update_epochs: 4
+  # 收紧更新：避免策略变化太大
+  update_epochs: 3  # 减少更新次数
   norm_adv: true
-  clip_coef: 0.2
-  clip_vloss: false
+  clip_coef: 0.15   # 收紧clip，限制策略变化幅度
+  # value 更稳
+  clip_vloss: true
   vf_clip_coef: 0.2
-  ent_coef: 0.0001
+  # 降低探索：策略已学会走，现在需要更稳定（减少晃动）
+  ent_coef: 0.0003  # 从 0.001 降低到 0.0003
   vf_coef: 0.3
   max_grad_norm: 0.5
-  target_kl: null
+  # KL 早停，避免重采样后一次更新过猛导致震荡
+  target_kl: 0.02
   log_window: 1000
 
-  # # # Network # # #
+  # # # 网络 # # #
   network:
-    input_dim: 64 # Embedding of the input features
-    hidden_dim: 128 # Latent dimension
+    input_dim: 64 # 输入特征嵌入维度
+    hidden_dim: 128 # 潜在维度
     dropout: 0.01
     class_name: "NeuralNet"
-    num_parameters: 0 # Total trainable parameters, to be filled at runtime
+    num_parameters: 0 # 可训练参数数量（运行时填充）
+    # 新增：观察融合网络配置
+    fusion_type: "attention" # 选项: "simple", "attention", "adaptive"
+    num_attention_heads: 4 # 注意力头数（仅在fusion_type="attention"时有效）
 
-  # # # Checkpointing # # #
-  checkpoint_interval: 400 # Save policy every k iterations
+  # # # 检查点保存 # # #
+  checkpoint_interval: 200 # 每隔 k 次迭代保存一次
   checkpoint_path: "./runs"
 
-  # # # Rendering # # #
-  render: false # Determines whether to render the environment (note: will slow down training)
-  render_3d: true # Render simulator state in 3d or 2d
-  render_interval: 1 # Render every k iterations
-  render_k_scenarios: 10 # Number of scenarios to render
-  render_format: "mp4" # Options: gif, mp4
-  render_fps: 15 # Frames per second
+  # # # 渲染 # # #
+  render: false # 是否渲染环境（开启会减慢训练）
+  render_3d: true # 渲染 3D 或 2D
+  render_interval: 1 # 每隔 k 次迭代渲染
+  render_k_scenarios: 0 # 训练期建议为 0，避免额外 IO/不确定性
+  render_format: "mp4" # 选项：gif、mp4
+  render_fps: 15 # 每秒帧数
   zoom_radius: 50
 
 vec:
-  backend: "native" # Only native is currently supported
+  backend: "native" # 目前仅支持 native
   num_workers: 1
   env_batch_size: 1
   zero_copy: false
diff --git a/baselines/ppo/config/ppo_base_sb3.yaml b/baselines/ppo/config/ppo_base_sb3.yaml
index d601b6e61..75d00efb2 100644
--- a/baselines/ppo/config/ppo_base_sb3.yaml
+++ b/baselines/ppo/config/ppo_base_sb3.yaml
@@ -1,5 +1,5 @@
 data_dir: "data/processed/examples"
-num_worlds: 100
+num_worlds: 30
 k_unique_scenes: 4
 device: "cuda"  # or "cpu"
 
diff --git a/gpudrive/env/config.py b/gpudrive/env/config.py
index 4ef009996..bc010d486 100755
--- a/gpudrive/env/config.py
+++ b/gpudrive/env/config.py
@@ -42,9 +42,9 @@ class EnvConfig:
 
     # Set the weights for the reward components
     # R = a * collided + b * goal_achieved + c * off_road
-    collision_weight: float = 0.0
+    collision_weight: float = -0.5
     goal_achieved_weight: float = 1.0
-    off_road_weight: float = 0.0
+    off_road_weight: float = -0.5
 
     # Road observation algorithm settings
     road_obs_algorithm: str = "linear"  # Algorithm for road observations
@@ -101,6 +101,22 @@ class EnvConfig:
     reward_type: str = "sparse_on_goal_achieved"
     # Alternatively, "weighted_combination", "distance_to_logs", "distance_to_vdb_trajs", "reward_conditioned"
 
+    # --- weighted_combination 额外稠密项（用于避免“动几下就停”的局部最优） ---
+    # 每一步的时间成本（仅在 reward_type == "weighted_combination" 时生效）
+    # 建议从 0.001~0.005 试起；过大可能导致冒进/碰撞上升
+    time_penalty: float = 0.0
+
+    # 低速/怠速惩罚（仅在 reward_type == "weighted_combination" 时生效）
+    # 当 speed < idle_speed_threshold 且未完成/未终止时，额外扣 idle_penalty
+    idle_speed_threshold: float = 0.5
+    idle_penalty: float = 0.0
+
+    # 进度奖励：距离目标越近，每步获得的奖励越高（密集正向信号）
+    # reward += progress_reward_weight * exp(-dist_to_goal / progress_reward_scale)
+    # 建议 progress_reward_weight: 0.1~0.3, progress_reward_scale: 15~30
+    progress_reward_weight: float = 0.0  # 默认关闭
+    progress_reward_scale: float = 20.0  # 距离衰减因子
+
     condition_mode: str = "random"  # Options: "random", "fixed", "preset"
 
     # Define upper and lower bounds for reward components if using reward_conditioned
diff --git a/gpudrive/env/dataset.py b/gpudrive/env/dataset.py
index 3ce244730..a1c821e31 100644
--- a/gpudrive/env/dataset.py
+++ b/gpudrive/env/dataset.py
@@ -35,6 +35,8 @@ def __post_init__(self):
             )
 
         # Set the random seed for reproducibility
+        if self.seed is None:
+            self.seed = 42
         self.random_gen = random.Random(self.seed)
 
         # Create the dataset from valid files in the directory
@@ -84,8 +86,9 @@ def __len__(self):
     def __next__(self) -> List[str]:
         if self.sample_with_replacement:
             # Ensure deterministic behavior
+            base_seed = 0 if self.seed is None else self.seed
             random_gen = random.Random(
-                self.seed + self.current_index
+                base_seed + self.current_index
             )  # Changing the seed per batch
 
             # Determine the batch size using the random generator to shuffle the indices
diff --git a/gpudrive/env/env_puffer.py b/gpudrive/env/env_puffer.py
index 811971cf6..583feab10 100644
--- a/gpudrive/env/env_puffer.py
+++ b/gpudrive/env/env_puffer.py
@@ -52,7 +52,7 @@ def __init__(
         off_road_weight=-0.5,
         goal_achieved_weight=1,
         dist_to_goal_threshold=2.0,
-        polyline_reduction_threshold=0.1,
+        polyline_reduction_threshold=0.1, #折线简化阈值，是一个用于控制道路图观察点采样密度的参数。
         remove_non_vehicles=True,
         obs_radius=50.0,
         use_vbd=False,
@@ -60,16 +60,16 @@ def __init__(
         vbd_trajectory_weight=0.1,
         render=False,
         render_3d=True,
-        render_interval=50,
-        render_k_scenarios=3,
+        render_interval=50, #渲染间隔，每隔多少步渲染一次
+        render_k_scenarios=3, #渲染场景数量
         render_agent_obs=False,
         render_format="mp4",
         render_fps=15,
         zoom_radius=50,
-        buf=None,
+        buf=None, #缓冲区，用于存储环境状态和动作
         **kwargs,
     ):
-        assert buf is None, "GPUDrive set up only for --vec native"
+        assert buf is None, "GPUDrive set up only for --vec native" #断言缓冲区为空，表示只支持原生环境
 
         if data_loader is None:
             data_loader = SceneDataLoader(
@@ -78,7 +78,7 @@ def __init__(
                 dataset_size=loader_dataset_size,
                 sample_with_replacement=loader_sample_with_replacement,
                 shuffle=loader_shuffle,
-            )
+            ) #数据加载器，用于加载场景数据
 
         if device is None:
             device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -239,6 +239,13 @@ def step(self, action):
         Args:
             action: A numpy array of actions for the controlled agents. Shape:
                 (num_worlds, max_cont_agents_per_env)
+        执行一步环境交互：
+        1. 应用动作
+        2. 执行物理仿真
+        3. 计算奖励
+        4. 处理终止状态
+        5. 异步重置完成的环境
+        6. 返回新的观测
         """
 
         # Set the action for the controlled agents
diff --git a/gpudrive/env/env_torch.py b/gpudrive/env/env_torch.py
index b8481c911..f6b499012 100755
--- a/gpudrive/env/env_torch.py
+++ b/gpudrive/env/env_torch.py
@@ -491,6 +491,51 @@ def get_rewards(
                 + off_road_weight * off_road
             )
 
+            # 稠密塑形：避免"停住最优"
+            # 仅对未 done 且未达成目标的 agent 生效（done/goal 后不再额外惩罚）
+            needs_shaping = (
+                self.config.time_penalty != 0.0
+                or self.config.idle_penalty != 0.0
+                or self.config.progress_reward_weight != 0.0
+            )
+            if needs_shaping:
+                done = (
+                    self.sim.done_tensor()
+                    .to_torch()
+                    .clone()
+                    .squeeze(dim=2)
+                    .to(weighted_rewards.device)
+                    .to(torch.float)
+                )
+                active = (1.0 - done) * (1.0 - goal_achieved)
+
+                if self.config.time_penalty != 0.0:
+                    weighted_rewards = weighted_rewards - self.config.time_penalty * active
+
+                # 获取速度（用于idle惩罚）
+                if self.config.idle_penalty != 0.0:
+                    speed = (
+                        self.sim.self_observation_tensor()
+                        .to_torch()
+                        .clone()[:, :, 0]
+                        .to(weighted_rewards.device)
+                        .to(torch.float)
+                    )
+                    is_idle = (speed < self.config.idle_speed_threshold).to(torch.float)
+                    weighted_rewards = weighted_rewards - self.config.idle_penalty * is_idle * active
+
+                # 进度奖励：距离目标越近，每步正奖励越高（密集引导信号）
+                if self.config.progress_reward_weight != 0.0:
+                    self_obs = self.sim.self_observation_tensor().to_torch().clone()
+                    rel_goal_x = self_obs[:, :, 4].to(weighted_rewards.device)
+                    rel_goal_y = self_obs[:, :, 5].to(weighted_rewards.device)
+                    dist_to_goal = torch.sqrt(rel_goal_x ** 2 + rel_goal_y ** 2 + 1e-6)
+                    progress_reward = self.config.progress_reward_weight * torch.exp(
+                        -dist_to_goal / self.config.progress_reward_scale
+                    )
+                    # 只给仍在行驶中的 agent
+                    weighted_rewards = weighted_rewards + progress_reward * active
+
             return weighted_rewards
 
         elif self.config.reward_type == "reward_conditioned":
diff --git a/gpudrive/integrations/puffer/ppo.py b/gpudrive/integrations/puffer/ppo.py
index bdc65ed45..f6e99bd7b 100644
--- a/gpudrive/integrations/puffer/ppo.py
+++ b/gpudrive/integrations/puffer/ppo.py
@@ -239,6 +239,20 @@ def train(data):
         dones_np = experience.dones_np[idxs]
         values_np = experience.values_np[idxs]
         rewards_np = experience.rewards_np[idxs]
+        
+        # 数值稳定性检查：检查输入数据
+        if np.isnan(dones_np).any() or np.isnan(values_np).any() or np.isnan(rewards_np).any():
+            print("Warning: NaN detected in GAE inputs, replacing with zeros")
+            dones_np = np.nan_to_num(dones_np, nan=0.0)
+            values_np = np.nan_to_num(values_np, nan=0.0)
+            rewards_np = np.nan_to_num(rewards_np, nan=0.0)
+        
+        # 检查是否有Inf值
+        if np.isinf(values_np).any() or np.isinf(rewards_np).any():
+            print("Warning: Inf detected in GAE inputs, clipping values")
+            values_np = np.clip(values_np, -1e6, 1e6)
+            rewards_np = np.clip(rewards_np, -1e6, 1e6)
+        
         advantages_np = compute_gae(
             dones_np, values_np, rewards_np, config.gamma, config.gae_lambda
         )
@@ -347,7 +361,12 @@ def train(data):
 
     with profile.train_misc:
         if config.anneal_lr:
-            frac = 1.0 - data.global_step / config.total_timesteps
+            # 支持继续训练时从配置的学习率开始衰减
+            lr_start_step = getattr(data, 'lr_start_step', 0)
+            lr_total_steps = config.total_timesteps - lr_start_step
+            steps_since_start = data.global_step - lr_start_step
+            frac = 1.0 - steps_since_start / lr_total_steps
+            frac = max(0.0, frac)  # 防止负数
             lrnow = float(frac) * float(config.learning_rate)
             data.optimizer.param_groups[0]["lr"] = lrnow
 
diff --git a/gpudrive/networks/late_fusion.py b/gpudrive/networks/late_fusion.py
index aa14b3e36..c7f1a9292 100644
--- a/gpudrive/networks/late_fusion.py
+++ b/gpudrive/networks/late_fusion.py
@@ -3,7 +3,7 @@
 import torch
 from torch import nn
 from torch.distributions.utils import logits_to_probs
-import pufferlib.models
+import pufferlib.models #主要作用为正交初始化神经网络层
 from gpudrive.env import constants
 from huggingface_hub import PyTorchModelHubMixin
 from box import Box
@@ -12,21 +12,21 @@
 
 TOP_K_ROAD_POINTS = madrona_gpudrive.kMaxAgentMapObservationsCount
 
-
+#计算log概率
 def log_prob(logits, value):
     value = value.long().unsqueeze(-1)
     value, log_pmf = torch.broadcast_tensors(value, logits)
     value = value[..., :1]
     return log_pmf.gather(-1, value).squeeze(-1)
 
-
+#计算熵
 def entropy(logits):
     min_real = torch.finfo(logits.dtype).min
     logits = torch.clamp(logits, min=min_real)
     p_log_p = logits * logits_to_probs(logits)
     return -p_log_p.sum(-1)
 
-
+#给定 logits（动作概率），返回采样/选择的 action、对应的 logprob 与 entropy
 def sample_logits(
     logits: Union[torch.Tensor, List[torch.Tensor]],
     action=None,
@@ -83,6 +83,8 @@ def __init__(
         max_controlled_agents=64,
         obs_dim=2984,  # Size of the flattened observation vector (hardcoded)
         config=None,  # Optional config
+        fusion_type="attention",  # 新增：融合类型选择
+        num_attention_heads=4,  # 新增：注意力头数
     ):
         super().__init__()
         self.input_dim = input_dim
@@ -94,12 +96,18 @@ def __init__(
         self.num_modes = 3  # Ego, partner, road graph
         self.dropout = dropout
         self.act_func = nn.Tanh() if act_func == "tanh" else nn.GELU()
+        self.fusion_type = fusion_type
+        self.num_attention_heads = num_attention_heads
 
         # Indices for unpacking the observation
         self.ego_state_idx = constants.EGO_FEAT_DIM
         self.partner_obs_idx = (
             constants.PARTNER_FEAT_DIM * self.max_controlled_agents
         )
+        
+        # Set default value for vbd_in_obs
+        self.vbd_in_obs = False
+        
         if config is not None:
             self.config = Box(config)
             if "reward_type" in self.config:
@@ -109,19 +117,21 @@ def __init__(
                     self.ego_state_idx += 3
                     self.partner_obs_idx += 3
 
-            self.vbd_in_obs = self.config.vbd_in_obs
+            # Override default if config contains vbd_in_obs
+            if hasattr(self.config, 'vbd_in_obs'):
+                self.vbd_in_obs = self.config.vbd_in_obs
 
         # Calculate the VBD predictions size: 91 timesteps * 5 features = 455
         self.vbd_size = 91 * 5
 
         self.ego_embed = nn.Sequential(
-            pufferlib.pytorch.layer_init(
+            pufferlib.pytorch.layer_init( #初始化线性层
                 nn.Linear(self.ego_state_idx, input_dim)
             ),
-            nn.LayerNorm(input_dim),
-            self.act_func,
-            nn.Dropout(self.dropout),
-            pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)),
+            nn.LayerNorm(input_dim), #层归一化
+            self.act_func, #激活函数
+            nn.Dropout(self.dropout), #丢弃，防止过拟合
+            pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), #初始化线性层
         )
 
         self.partner_embed = nn.Sequential(
@@ -155,8 +165,31 @@ def __init__(
                 pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)),
             )
 
+        # 新增：注意力融合机制
+        if self.fusion_type == "attention":
+            self.attention_fusion = nn.MultiheadAttention(
+                embed_dim=input_dim,
+                num_heads=self.num_attention_heads,
+                dropout=self.dropout,
+                batch_first=True
+            )
+            self.attention_norm = nn.LayerNorm(input_dim)
+            # 注意力融合后的输出维度（使用flatten保留完整信息）
+            fusion_output_dim = input_dim * 3
+        elif self.fusion_type == "adaptive":
+            # 自适应权重融合
+            self.adaptive_weights = nn.Sequential(
+                nn.Linear(input_dim * self.num_modes, 64),
+                self.act_func,
+                nn.Linear(64, self.num_modes),
+                nn.Softmax(dim=-1)
+            )
+            fusion_output_dim = input_dim
+        else:  # 原始简单拼接
+            fusion_output_dim = self.input_dim * self.num_modes
+
         self.shared_embed = nn.Sequential(
-            nn.Linear(self.input_dim * self.num_modes, self.hidden_dim),
+            nn.Linear(fusion_output_dim, self.hidden_dim),
             nn.Dropout(self.dropout),
         )
 
@@ -191,11 +224,50 @@ def encode_observations(self, observation):
         partner_embed, _ = self.partner_embed(road_objects).max(dim=1)
         road_map_embed, _ = self.road_map_embed(road_graph).max(dim=1)
 
-        # Concatenate the embeddings
-        embed = torch.cat([ego_embed, partner_embed, road_map_embed], dim=1)
+        # 新增：不同的融合策略
+        if self.fusion_type == "attention":
+            # 注意力融合
+            embed = self._attention_fusion(ego_embed, partner_embed, road_map_embed)
+        elif self.fusion_type == "adaptive":
+            # 自适应权重融合
+            embed = self._adaptive_fusion(ego_embed, partner_embed, road_map_embed)
+        else:
+            # 原始简单拼接
+            embed = torch.cat([ego_embed, partner_embed, road_map_embed], dim=1)
 
         return self.shared_embed(embed)
 
+    def _attention_fusion(self, ego_embed, partner_embed, road_embed):
+        """使用多头注意力机制进行模态融合"""
+        # 组合所有模态: (batch, 3, input_dim)
+        modalities = torch.stack([ego_embed, partner_embed, road_embed], dim=1)
+        
+        # 自注意力融合
+        attended, attention_weights = self.attention_fusion(
+            modalities, modalities, modalities
+        )
+        
+        # 残差连接 + 层归一化
+        attended = self.attention_norm(attended + modalities)
+        
+        # 使用flatten保留完整信息，而不是平均池化
+        # 这样可以避免信息瓶颈（192维 vs 64维），提高最终性能
+        return attended.flatten(start_dim=1) 
+    
+    def _adaptive_fusion(self, ego_embed, partner_embed, road_embed):
+        """使用自适应权重进行模态融合"""
+        # 拼接所有模态特征
+        combined = torch.cat([ego_embed, partner_embed, road_embed], dim=-1)
+        
+        # 计算每个模态的权重
+        weights = self.adaptive_weights(combined)
+        
+        # 加权融合
+        modalities = torch.stack([ego_embed, partner_embed, road_embed], dim=-1)
+        weighted_fusion = (modalities * weights.unsqueeze(1)).sum(dim=-1)
+        
+        return weighted_fusion
+
     def forward(self, obs, action=None, deterministic=False):
 
         # Encode the observations
diff --git a/gpudrive/visualize/core.py b/gpudrive/visualize/core.py
index f961f82a5..6ec9d09b3 100644
--- a/gpudrive/visualize/core.py
+++ b/gpudrive/visualize/core.py
@@ -100,6 +100,7 @@ def plot_simulator_state(
         zoom_radius: int = 100,
         plot_log_replay_trajectory: bool = False,
         agent_positions: Optional[torch.Tensor] = None,
+        predicted_trajectories: Optional[torch.Tensor] = None,
         backward_goals: bool = False,
         policy_masks: Optional[Dict[int,Dict[str,torch.Tensor]]] = None,
     ):
@@ -432,6 +433,15 @@ def plot_simulator_state(
                 except Exception as e:
                     print(f"Warning: Could not add colorbar: {e}")
 
+            # 绘制预测轨迹（未来轨迹）
+            if predicted_trajectories is not None:
+                self._plot_predicted_trajectories(
+                    ax=ax,
+                    env_idx=env_idx,
+                    predicted_trajectories=predicted_trajectories,
+                    controlled_live=controlled_live,
+                )
+
             # Determine center point for zooming
             if center_agent_idx is not None:
                 center_x = global_agent_states.pos_x[
@@ -1574,3 +1584,82 @@ def plot_agent_observation(
         ax.set_yticks([])
 
         return fig
+
+    def _plot_predicted_trajectories(
+        self,
+        ax: matplotlib.axes.Axes,
+        env_idx: int,
+        predicted_trajectories: torch.Tensor,
+        controlled_live: torch.Tensor,
+    ) -> None:
+        """
+        绘制预测的未来轨迹
+        
+        Args:
+            ax: Matplotlib axis
+            env_idx: 环境索引
+            predicted_trajectories: [num_worlds, max_agents, horizon, 2] 预测轨迹
+            controlled_live: [max_agents] 受控且存活的智能体掩码
+        """
+        if predicted_trajectories is None:
+            return
+        
+        # 预测轨迹颜色（使用虚线表示预测）
+        pred_color = "#FF6B6B"  # 红色，表示预测
+        pred_alpha = 0.6
+        pred_linewidth = 2.0
+        
+        for agent_idx in range(predicted_trajectories.shape[1]):
+            if controlled_live[agent_idx]:
+                trajectory = predicted_trajectories[env_idx, agent_idx, :, :]  # [horizon, 2]
+                
+                # 过滤无效点
+                valid_mask = (
+                    (trajectory[:, 0] != 0)
+                    & (trajectory[:, 1] != 0)
+                    & (torch.abs(trajectory[:, 0]) < OUT_OF_BOUNDS)
+                    & (torch.abs(trajectory[:, 1]) < OUT_OF_BOUNDS)
+                )
+                valid_trajectory = trajectory[valid_mask]
+                
+                if len(valid_trajectory) > 1:
+                    points = valid_trajectory.cpu().numpy()
+                    
+                    if self.render_3d:
+                        # 3D 绘制
+                        trajectory_height = 0.1  # 稍微高一点以区分预测轨迹
+                        ax.plot(
+                            points[:, 0],
+                            points[:, 1],
+                            trajectory_height,
+                            color=pred_color,
+                            linestyle="--",
+                            linewidth=pred_linewidth,
+                            alpha=pred_alpha,
+                            zorder=2,
+                            label="Predicted" if agent_idx == 0 else "",
+                        )
+                    else:
+                        # 2D 绘制
+                        ax.plot(
+                            points[:, 0],
+                            points[:, 1],
+                            color=pred_color,
+                            linestyle="--",
+                            linewidth=pred_linewidth,
+                            alpha=pred_alpha,
+                            zorder=2,
+                            label="Predicted" if agent_idx == 0 else "",
+                        )
+                        
+                        # 在轨迹终点添加标记
+                        if len(points) > 0:
+                            ax.scatter(
+                                points[-1, 0],
+                                points[-1, 1],
+                                color=pred_color,
+                                marker="x",
+                                s=50,
+                                alpha=pred_alpha,
+                                zorder=3,
+                            )