Skip to content

Commit c1f5932

Browse files
committed
Merge branch '10x/rvv-floating-kernels' of github.com:riseproject-dev/llama.cpp into 10x/rvv-floating-kernels
2 parents 7a27886 + c729018 commit c1f5932

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3274,6 +3274,7 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
32743274
__m128 y_vec = _mm_cvtph_ps(x_vec);
32753275
_mm_storeu_ps(y + i, y_vec);
32763276
}
3277+
32773278
#elif defined(__riscv_v_intrinsic) && defined(__riscv_zvfhmin)
32783279
// calculate step size
32793280
const int epr = __riscv_vsetvlmax_e16m2();
@@ -3288,17 +3289,19 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
32883289

32893290
vfloat16m2_t ax1 = __riscv_vle16_v_f16m2((const _Float16*)x + i + epr, epr);
32903291
vfloat32m4_t ay1 = __riscv_vfwcvt_f_f_v_f32m4(ax1, epr);
3292+
32913293
__riscv_vse32_v_f32m4(y + i + epr, ay1, epr);
32923294
}
32933295

3294-
// leftovers
3296+
// handle leftovers
32953297
int vl;
32963298
for (i = np; i < n; i += vl) {
32973299
vl = __riscv_vsetvl_e16m2(n - i);
32983300
vfloat16m2_t ax0 = __riscv_vle16_v_f16m2((const _Float16*)x + i, vl);
32993301
vfloat32m4_t ay0 = __riscv_vfwcvt_f_f_v_f32m4(ax0, vl);
33003302
__riscv_vse32_v_f32m4(y + i, ay0, vl);
33013303
}
3304+
33023305
#endif
33033306

33043307
for (; i < n; ++i) {

0 commit comments

Comments
 (0)