From 115167f5f4f5702f3f12b993393647eb4110762c Mon Sep 17 00:00:00 2001
From: Waynezee <wangxiangzhe@baidu.com>
Date: Fri, 20 Mar 2026 15:10:34 +0800
Subject: [PATCH] test

---
 tests/multi_card_tests/pipeline_parallel/test_gpt_pp.py      | 1 +
 .../pipeline_parallel/test_gpt_pp_with_moe.py                | 1 +
 .../pipeline_parallel/test_gpt_pp_with_moe_with_mtp.py       | 1 +
 .../pipeline_parallel/test_pp_with_shared_weight.py          | 1 +
 .../test_vpp_balanced_memory_with_shared_weight.py           | 5 ++++-
 .../pipeline_parallel/test_vpp_fthenb_with_shared_weight.py  | 1 +
 .../pipeline_parallel/test_vpp_with_shared_weight.py         | 1 +
 7 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/multi_card_tests/pipeline_parallel/test_gpt_pp.py b/tests/multi_card_tests/pipeline_parallel/test_gpt_pp.py
index 49fceaca9..617f4400a 100644
--- a/tests/multi_card_tests/pipeline_parallel/test_gpt_pp.py
+++ b/tests/multi_card_tests/pipeline_parallel/test_gpt_pp.py
@@ -99,6 +99,7 @@ def run_pp(
             "forward_backward_overlap_scheduler": forward_backward_overlap_scheduler,
             "overlap_p2p_comm": True,
             "enable_dynamic_shape": True,
+            "delay_scale_loss": True,
         },
     }
     micro_batch_size = 1
diff --git a/tests/multi_card_tests/pipeline_parallel/test_gpt_pp_with_moe.py b/tests/multi_card_tests/pipeline_parallel/test_gpt_pp_with_moe.py
index 843a265b5..0fdfa8206 100644
--- a/tests/multi_card_tests/pipeline_parallel/test_gpt_pp_with_moe.py
+++ b/tests/multi_card_tests/pipeline_parallel/test_gpt_pp_with_moe.py
@@ -99,6 +99,7 @@ def run_pp(
             "forward_backward_overlap_scheduler": forward_backward_overlap_scheduler,
             "overlap_p2p_comm": True,
             "enable_dynamic_shape": True,
+            "delay_scale_loss": True,
         },
     }
     micro_batch_size = 1
diff --git a/tests/multi_card_tests/pipeline_parallel/test_gpt_pp_with_moe_with_mtp.py b/tests/multi_card_tests/pipeline_parallel/test_gpt_pp_with_moe_with_mtp.py
index 7ac94ff3d..2e5fea79e 100644
--- a/tests/multi_card_tests/pipeline_parallel/test_gpt_pp_with_moe_with_mtp.py
+++ b/tests/multi_card_tests/pipeline_parallel/test_gpt_pp_with_moe_with_mtp.py
@@ -99,6 +99,7 @@ def run_pp(
             "forward_backward_overlap_scheduler": forward_backward_overlap_scheduler,
             "overlap_p2p_comm": True,
             "enable_dynamic_shape": True,
+            "delay_scale_loss": True,
         },
     }
     micro_batch_size = 1
diff --git a/tests/multi_card_tests/pipeline_parallel/test_pp_with_shared_weight.py b/tests/multi_card_tests/pipeline_parallel/test_pp_with_shared_weight.py
index 808e6ae89..29ab8e61f 100644
--- a/tests/multi_card_tests/pipeline_parallel/test_pp_with_shared_weight.py
+++ b/tests/multi_card_tests/pipeline_parallel/test_pp_with_shared_weight.py
@@ -193,6 +193,7 @@ def setUp(self):
             "micro_batch_size": micro_batch_size,
         }
         strategy.hybrid_configs["pp_configs"].clear_every_step_cache = True
+        strategy.hybrid_configs["pp_configs"].delay_scale_loss = True
         self.strategy = strategy
 
         fleet.init(is_collective=True, strategy=strategy)
diff --git a/tests/multi_card_tests/pipeline_parallel/test_vpp_balanced_memory_with_shared_weight.py b/tests/multi_card_tests/pipeline_parallel/test_vpp_balanced_memory_with_shared_weight.py
index 3ca214e5a..c7142d5ed 100644
--- a/tests/multi_card_tests/pipeline_parallel/test_vpp_balanced_memory_with_shared_weight.py
+++ b/tests/multi_card_tests/pipeline_parallel/test_vpp_balanced_memory_with_shared_weight.py
@@ -76,7 +76,10 @@ def setUp(self):
             "dp_degree": self.data_parallel_size,
             "mp_degree": self.model_parallel_size,
             "pp_degree": self.pipeline_parallel_size,
-            "pp_configs": {"best_unbalanced_scheduler": True},
+            "pp_configs": {
+                "best_unbalanced_scheduler": True,
+                "delay_scale_loss": True,
+            },
         }
         strategy.pipeline_configs = {
             "accumulate_steps": batch_size // micro_batch_size,
diff --git a/tests/multi_card_tests/pipeline_parallel/test_vpp_fthenb_with_shared_weight.py b/tests/multi_card_tests/pipeline_parallel/test_vpp_fthenb_with_shared_weight.py
index 4de041c7d..7d93603e1 100644
--- a/tests/multi_card_tests/pipeline_parallel/test_vpp_fthenb_with_shared_weight.py
+++ b/tests/multi_card_tests/pipeline_parallel/test_vpp_fthenb_with_shared_weight.py
@@ -83,6 +83,7 @@ def setUp(self):
         }
         strategy.hybrid_configs["pp_configs"].sync_moment = True
         strategy.hybrid_configs["pp_configs"].sync_param = True
+        strategy.hybrid_configs["pp_configs"].delay_scale_loss = True
         self.strategy = strategy
         fleet.init(is_collective=True, strategy=strategy)
 
diff --git a/tests/multi_card_tests/pipeline_parallel/test_vpp_with_shared_weight.py b/tests/multi_card_tests/pipeline_parallel/test_vpp_with_shared_weight.py
index 247a43c06..28c329f65 100644
--- a/tests/multi_card_tests/pipeline_parallel/test_vpp_with_shared_weight.py
+++ b/tests/multi_card_tests/pipeline_parallel/test_vpp_with_shared_weight.py
@@ -83,6 +83,7 @@ def setUp(self):
         }
         strategy.hybrid_configs["pp_configs"].sync_moment = True
         strategy.hybrid_configs["pp_configs"].sync_param = True
+        strategy.hybrid_configs["pp_configs"].delay_scale_loss = True
         self.strategy = strategy
 
         fleet.init(is_collective=True, strategy=strategy)