Skip to content

Commit 8712870

Browse files
committed
perf(cpu): softmax-cpu 并行化以加速大上下文的 attention 计算
Signed-off-by: YdrMaster <ydrml@hotmail.com>
1 parent 0bd4107 commit 8712870

File tree

1 file changed

+11
-10
lines changed
  • operators/src/fuesd_softmax/common_cpu

1 file changed

+11
-10
lines changed

operators/src/fuesd_softmax/common_cpu/mod.rs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
};
55
use crate::{common_cpu::Cpu, get_static, ByteOf, LaunchError, QueueAlloc, SchemeError};
66
use half::f16;
7+
use rayon::iter::{IntoParallelIterator, ParallelIterator};
78

89
pub struct Operator;
910

@@ -100,16 +101,16 @@ impl<T> Scheme<T> {
100101
let seq_len = self.seq_len as isize;
101102
let att_len = self.att_len as isize;
102103

103-
for j in 0..seq_len {
104-
(0..nh).for_each(|i| {
105-
let att = unsafe { self.att_base.byte_offset(i * self.sh + j * self.ss) };
106-
let causal = match mask {
107-
AttnMask::None => att_len,
108-
AttnMask::Causal => att_len - seq_len + j + 1,
109-
};
110-
f(causal, att);
111-
})
112-
}
104+
(0..nh * seq_len).into_par_iter().for_each(|i| {
105+
let j = i / seq_len;
106+
let k = i % seq_len;
107+
let att = unsafe { self.att_base.byte_offset(j * self.sh + k * self.ss) };
108+
let causal = match mask {
109+
AttnMask::None => att_len,
110+
AttnMask::Causal => att_len - seq_len + k + 1,
111+
};
112+
f(causal, att)
113+
});
113114
}
114115
}
115116

0 commit comments

Comments
 (0)