From ebe72d4296f5944ccd3337b0c24e303b0dba574d Mon Sep 17 00:00:00 2001 From: ArthurinRUC Date: Wed, 31 Dec 2025 15:16:11 +0800 Subject: [PATCH 1/2] test: testing ci usability --- .../warp_specialization_pipeline.py | 2 +- .../warp_specialization_pipeline_api.cu | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/14-warp-specialization/warp_specialization_pipeline.py b/14-warp-specialization/warp_specialization_pipeline.py index 4887ac4..fd4c824 100644 --- a/14-warp-specialization/warp_specialization_pipeline.py +++ b/14-warp-specialization/warp_specialization_pipeline.py @@ -63,7 +63,7 @@ def relative_error(target: torch.Tensor, ref: torch.Tensor, eps: float = 1e-8): num_failed = 0 -def compare_matrix(kernel_output: torch.Tensor, torch_output: torch.Tensor): +def compare_matrix(kernel_output: torch.Tensor, torch_output: torch.Tensor): kernel_output = kernel_output.float() torch_output = torch_output.float() diff --git a/14-warp-specialization/warp_specialization_pipeline_api.cu b/14-warp-specialization/warp_specialization_pipeline_api.cu index 3c13cdb..d55a11b 100644 --- a/14-warp-specialization/warp_specialization_pipeline_api.cu +++ b/14-warp-specialization/warp_specialization_pipeline_api.cu @@ -306,13 +306,13 @@ __global__ __launch_bounds__(Spec::kThreadNum) void warp_specialization(__grid_c Tensor tCsC_s2r = s2r_thr_copy_c.partition_S(sC); // (CPY, CPY_M, CPY_K) Tensor tCrC_s2r = s2r_thr_copy_c.retile_D(tCrC_load); // (CPY, CPY_M, CPY_K) - if (consumer_tid == 0) { - initialize_barrier(tma_load_c_mbarrier, /* arrival thread count */ 1); - cutlass::arch::fence_view_async_shared(); + if (consumer_tid == 0) { + initialize_barrier(tma_load_c_mbarrier, /* arrival thread count */ 1); + cutlass::arch::fence_view_async_shared(); - copy(tma_C.with(tma_load_c_mbarrier), tCgC, tCsC); - set_barrier_transaction_bytes(tma_load_c_mbarrier, tma_transaction_load_c_bytes); - } + copy(tma_C.with(tma_load_c_mbarrier), tCgC, tCsC); + set_barrier_transaction_bytes(tma_load_c_mbarrier, tma_transaction_load_c_bytes); + } warpgroup_sync(kNumMmaWarpGroups); wait_barrier(tma_load_c_mbarrier, /* phase */ 0); From adedb09f081c5d8d548c4887adc6d84eb039f55c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 31 Dec 2025 07:57:38 +0000 Subject: [PATCH 2/2] style: auto-format Python code with ruff --- 14-warp-specialization/warp_specialization_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/14-warp-specialization/warp_specialization_pipeline.py b/14-warp-specialization/warp_specialization_pipeline.py index fd4c824..4887ac4 100644 --- a/14-warp-specialization/warp_specialization_pipeline.py +++ b/14-warp-specialization/warp_specialization_pipeline.py @@ -63,7 +63,7 @@ def relative_error(target: torch.Tensor, ref: torch.Tensor, eps: float = 1e-8): num_failed = 0 -def compare_matrix(kernel_output: torch.Tensor, torch_output: torch.Tensor): +def compare_matrix(kernel_output: torch.Tensor, torch_output: torch.Tensor): kernel_output = kernel_output.float() torch_output = torch_output.float()