Skip to content

Commit be4fa97

Browse files
committed
ggml: fmt
1 parent da60598 commit be4fa97

File tree

1 file changed

+0
-41
lines changed

1 file changed

+0
-41
lines changed

ggml/src/ggml-cpu/vec.cpp

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -237,48 +237,7 @@ void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t *
237237
vfloat32m1_t acc1 = __riscv_vfadd_vv_f32m1(__riscv_vget_v_f32m2_f32m1(acc0, 0), __riscv_vget_v_f32m2_f32m1(acc0, 1), vl);
238238
vfloat32m1_t redsum = __riscv_vfredusum_vs_f32m1_f32m1(acc1, __riscv_vfmv_v_f_f32m1(0.0f, 1), vl);
239239
sumf += __riscv_vfmv_f_s_f32m1_f32(redsum);
240-
#elif defined(__riscv_v_intrinsic) && defined(__riscv_zvfbfwma)
241-
size_t vl = __riscv_vsetvlmax_e32m4();
242-
243-
// initialize accumulators to all zeroes
244-
vfloat32m4_t vsum0 = __riscv_vfmv_v_f_f32m4(0.0f, vl);
245-
vfloat32m4_t vsum1 = __riscv_vfmv_v_f_f32m4(0.0f, vl);
246-
247-
// calculate step size
248-
const size_t epr = __riscv_vsetvlmax_e16m2();
249-
const size_t step = epr * 2;
250-
const int np = (n & ~(step - 1));
251240

252-
// unroll by 2
253-
for (; i < np; i += step) {
254-
vbfloat16m2_t ax0 = __riscv_vle16_v_bf16m2((const __bf16 *)&x[i], epr);
255-
vbfloat16m2_t ay0 = __riscv_vle16_v_bf16m2((const __bf16 *)&y[i], epr);
256-
vsum0 = __riscv_vfwmaccbf16_vv_f32m4(vsum0, ax0, ay0, epr);
257-
__asm__ __volatile__ ("" ::: "memory");
258-
259-
vbfloat16m2_t ax1 = __riscv_vle16_v_bf16m2((const __bf16 *)&x[i + epr], epr);
260-
vbfloat16m2_t ay1 = __riscv_vle16_v_bf16m2((const __bf16 *)&y[i + epr], epr);
261-
vsum1 = __riscv_vfwmaccbf16_vv_f32m4(vsum1, ax1, ay1, epr);
262-
__asm__ __volatile__ ("" ::: "memory");
263-
}
264-
265-
// accumulate in 1 register
266-
vsum0 = __riscv_vfadd_vv_f32m4(vsum0, vsum1, vl);
267-
268-
// leftovers
269-
for (i = np; i < n; i += vl) {
270-
vl = __riscv_vsetvl_e16m2(n - i);
271-
vbfloat16m2_t ax0 = __riscv_vle16_v_bf16m2((const __bf16 *)&x[i], vl);
272-
vbfloat16m2_t ay0 = __riscv_vle16_v_bf16m2((const __bf16 *)&y[i], vl);
273-
vsum0 = __riscv_vfwmaccbf16_vv_f32m4(vsum0, ax0, ay0, vl);
274-
}
275-
276-
// reduce
277-
vl = __riscv_vsetvlmax_e32m2();
278-
vfloat32m2_t acc0 = __riscv_vfadd_vv_f32m2(__riscv_vget_v_f32m4_f32m2(vsum0, 0), __riscv_vget_v_f32m4_f32m2(vsum0, 1), vl);
279-
vfloat32m1_t acc1 = __riscv_vfadd_vv_f32m1(__riscv_vget_v_f32m2_f32m1(acc0, 0), __riscv_vget_v_f32m2_f32m1(acc0, 1), vl);
280-
vfloat32m1_t redsum = __riscv_vfredusum_vs_f32m1_f32m1(acc1, __riscv_vfmv_v_f_f32m1(0.0f, 1), vl);
281-
sumf += __riscv_vfmv_f_s_f32m1_f32(redsum);
282241
#endif
283242
for (; i < n; ++i) {
284243
sumf += (ggml_float)(GGML_BF16_TO_FP32(x[i]) *

0 commit comments

Comments
 (0)