fix: 支持attention计算 q，k，v不相同的情况

onenewcode · onenewcode · commit 120d30f0102e · 2025-02-14T12:33:42.000+08:00
diff --git a/operators/src/attention/args.rs b/operators/src/attention/args.rs
@@ -30,6 +30,7 @@ pub(super) struct Meta {
     pub seq: MaybeDyn<usize>,
     pub att: MaybeDyn<usize>,
     pub dh: MaybeDyn<usize>,
+    pub dv: MaybeDyn<usize>,
 }
 
 impl<H: Hardware> Args<H> {
@@ -41,17 +42,20 @@ impl<H: Hardware> Args<H> {
         seq: MaybeDyn<usize>,
         att: MaybeDyn<usize>,
         dh: MaybeDyn<usize>,
+        dv: MaybeDyn<usize>,
     ) -> Self {
-        let qo_layout = TensorLayout::new_dyn(dt, &[nh, seq, dh], &[dyn_(); 3]);
-        let kv_layout = TensorLayout::new_dyn(dt, &[nkvh, att, dh], &[dyn_(); 3]);
+        let q_layout = TensorLayout::new_dyn(dt, &[nh, seq, dh], &[dyn_(); 3]);
+        let k_layout = TensorLayout::new_dyn(dt, &[nkvh, seq, dh], &[dyn_(); 3]);
+        let v_layout = TensorLayout::new_dyn(dt, &[nkvh, att, dv], &[dyn_(); 3]);
+        let o_layout = TensorLayout::new_dyn(dt, &[nkvh, att, dh], &[dyn_(); 3]);
         Self {
-            q_layout: qo_layout.clone(),
+            q_layout: q_layout.clone(),
             q_base: null_mut(),
-            k_layout: kv_layout.clone(),
+            k_layout: k_layout.clone(),
             k_base: null(),
-            v_layout: kv_layout,
+            v_layout: v_layout,
             v_base: null(),
-            o_layout: qo_layout,
+            o_layout: o_layout,
             o_base: null_mut(),
             mask,
         }
@@ -85,7 +89,8 @@ impl<H: Hardware> Args<H> {
             nkvh: dim_distinct(&[nkvh_k, nkvh_v])?,
             seq: dim_distinct(&[seq_q, seq_o])?,
             att: dim_distinct(&[att_k, att_v])?,
-            dh: dim_distinct(&[dh_q, dh_k, dh_v, dh_o])?,
+            dh: dim_distinct(&[dh_q, dh_k])?,
+            dv: dim_distinct(&[dh_v, dh_o])?,
         })
     }
 }
diff --git a/operators/src/attention/cuda.rs b/operators/src/attention/cuda.rs
@@ -16,6 +16,7 @@ mod test {
             seq.into(),
             att.into(),
             dyn_(),
+            dyn_(),
         )
     }
 
diff --git a/operators/src/attention/operator.rs b/operators/src/attention/operator.rs
@@ -1,4 +1,4 @@
-﻿use super::{args::Meta, Args, Attention};
+use super::{args::Meta, Args, Attention};
 use crate::{
     dyn_, fuesd_softmax, get_static, mat_mul, rearrange, ByteOf, Hardware, LaunchError, QueueAlloc,
     SchemeError, TensorLayout, Workspace, WorkspaceCollector,
@@ -53,6 +53,7 @@ where
             seq,
             att,
             dh,
+            dv,
             ..
         } = args.meta()?;
         let Args {
@@ -64,11 +65,12 @@ where
         } = args;
 
         // 如果不能保证 nh seq att dh 已知，用任意值初始化算子
-        let (Some(&nh), Some(&seq), Some(&att), Some(&dh)) = (
+        let (Some(&nh), Some(&seq), Some(&att), Some(&dh), Some(&dv)) = (
             nh.get_static(),
             seq.get_static(),
             att.get_static(),
             dh.get_static(),
+            dv.get_static(),
         ) else {
             let mut wc = WorkspaceCollector::new();
 
@@ -149,6 +151,7 @@ where
             seq,
             att,
             dh,
+            dv,
         } = args.meta()?;
         let Args {
             mask,
@@ -172,8 +175,8 @@ where
         let ele = dt.nbytes();
         get_static! {
             nh      seq    dh
-            nh_sq   seq_sq dh_sq
-            nkvh    att
+            dv      seq_sq dh_sq
+            nkvh    att    nh_sq
             nkvh_sk att_sk dh_sk
         };
 
@@ -219,6 +222,7 @@ where
         let k_layout = TensorLayout::new(dt, k_layout.shape(), k_layout.strides());
         let att_mat_mul = TensorLayout::new_contiguous(dt, &[nkvh, head_group * seq, att]);
         let att_softmax = TensorLayout::new_contiguous(dt, &[nh, seq, att]);
+        let att_result = TensorLayout::new_contiguous(dt, &[nkvh, head_group * seq, dv]);
 
         // att = q . k^T
         self.mat_mul.launch(
@@ -248,7 +252,7 @@ where
         // q = att . v
         self.mat_mul.launch(
             &mat_mul::Args {
-                c_layout: qx_layout.clone(),
+                c_layout: att_result.clone(),
                 c_base: q_base,
                 beta: 0.,
                 a_layout: att_mat_mul,
@@ -266,7 +270,7 @@ where
                 &rearrange::Args {
                     dst_layout: o_layout.clone(),
                     dst_base: *o_base,
-                    src_layout: q_layout.clone(),
+                    src_layout: o_layout.clone(),
                     src_base: q_base,
                 },
                 workspace,
diff --git a/operators/src/attention_kv_cached/operator.rs b/operators/src/attention_kv_cached/operator.rs
@@ -1,4 +1,4 @@
-﻿use super::{args::Meta, Args, AttnKVCached};
+use super::{args::Meta, Args, AttnKVCached};
 use crate::{
     attention, dyn_, get_static, rearrange, shape_mismatch, ByteOf, Hardware, LaunchError,
     MaybeDyn, QueueAlloc, TensorLayout, WorkspaceCollector,
@@ -66,7 +66,7 @@ where
         };
 
         wc.push_sub(self.attention.scheme(
-            &attention::Args::new_null(args.mask, dt, nh, nkvh, seq, att, dh),
+            &attention::Args::new_null(args.mask, dt, nh, nkvh, seq, att, dh, dh),
             max_workspace_size,
         )?);
 

Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,7 @@ mod test {`
`16`	`16`	`seq.into(),`
`17`	`17`	`att.into(),`
`18`	`18`	`dyn_(),`
	`19`	`+ dyn_(),`
`19`	`20`	`)`
`20`	`21`	`}`
`21`	`22`