Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions cmake/riscv64-spacemit-linux-gnu-gcc.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) 2023 SpacemiT. All rights reserved.
set(CMAKE_SYSTEM_NAME Linux)
SET(CMAKE_SYSTEM_PROCESSOR riscv64)
set(CMAKE_SYSTEM_VERSION 1)

if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(riscv)")
message(STATUS "HOST SYSTEM ${CMAKE_HOST_SYSTEM_PROCESSOR}")
else()
set(GNU_MACHINE riscv64-unknown-linux-gnu CACHE STRING "GNU compiler triple")
if(DEFINED ENV{RISCV_ROOT_PATH})
file(TO_CMAKE_PATH $ENV{RISCV_ROOT_PATH} RISCV_ROOT_PATH)
else()
message(FATAL_ERROR "RISCV_ROOT_PATH env must be defined")
endif()

set(RISCV_ROOT_PATH ${RISCV_ROOT_PATH} CACHE STRING "root path to riscv toolchain")
set(CMAKE_C_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-g++)
set(CMAKE_STRIP ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-strip)
set(CMAKE_FIND_ROOT_PATH "${RISCV_ROOT_PATH}/riscv64-unknown-linux-gnu")
set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot")
endif()

set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -latomic")
add_definitions(-D__fp16=_Float16)
87 changes: 87 additions & 0 deletions docs/build-riscv64-spacemit.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
> [!IMPORTANT]
> This build documentation is specific only to RISC-V SpacemiT SOCs.

## Build llama.cpp locally (for riscv64)

1. Prepare Toolchain For RISCV
~~~
wget https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v1.1.2.tar.xz
~~~

2. Build
Below is the build script: it requires utilizing RISC-V vector instructions for acceleration. Ensure the `GGML_CPU_RISCV64_SPACEMIT` compilation option is enabled. The currently supported optimization version is `RISCV64_SPACEMIT_IME1`, corresponding to the `RISCV64_SPACEMIT_IME_SPEC` compilation option. Compiler configurations are defined in the `riscv64-spacemit-linux-gnu-gcc.cmake` file. Please ensure you have installed the RISC-V compiler and set the environment variable via `export RISCV_ROOT_PATH={your_compiler_path}`.
```bash

cmake -B build-riscv64-spacemit \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_CPU_RISCV64_SPACEMIT=ON \
-DLLAMA_CURL=OFF \
-DGGML_RV_ZFH=ON \
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake \
-DCMAKE_INSTALL_PREFIX=build-riscv64-spacemit/installed

cmake --build build-riscv64-spacemit --parallel $(nproc) --config Release

pushd build-riscv64-spacemit
make install
popd
```

## Simulation
You can use QEMU to perform emulation on non-RISC-V architectures.

1. Download QEMU
~~~
wget https://archive.spacemit.com/spacemit-ai/qemu/jdsk-qemu-v0.0.14.tar.gz
~~~

2. Run Simulation
After build your llama.cpp, you can run the executable file via QEMU for simulation, for example:
~~~
export QEMU_ROOT_PATH={your QEMU file path}
export RISCV_ROOT_PATH_IME1={your RISC-V compiler path}

${QEMU_ROOT_PATH}/bin/qemu-riscv64 -L ${RISCV_ROOT_PATH_IME1}/sysroot -cpu max,vlen=256,elen=64,vext_spec=v1.0 ${PWD}/build-riscv64-spacemit/bin/llama-cli -m ${PWD}/models/Qwen2.5-0.5B-Instruct-Q4_0.gguf -t 1
~~~
## Performance
#### Quantization Support For Matrix
~~~
model name : Spacemit(R) X60
isa : rv64imafdcv_zicbom_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zfhmin_zca_zcd_zba_zbb_zbc_zbs_zkt_zve32f_zve32x_zve64d_zve64f_zve64x_zvfh_zvfhmin_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt
mmu : sv39
uarch : spacemit,x60
mvendorid : 0x710
marchid : 0x8000000058000001
~~~

Q4_0
| Model | Size | Params | backend | threads | test | t/s |
| -----------| -------- | ------ | ------- | ------- | ---- |------|
Qwen2.5 0.5B |403.20 MiB|630.17 M| cpu | 4 | pp512|64.12 ± 0.26|
Qwen2.5 0.5B |403.20 MiB|630.17 M| cpu | 4 | tg128|10.03 ± 0.01|
Qwen2.5 1.5B |1011.16 MiB| 1.78 B | cpu | 4 | pp512|24.16 ± 0.02|
Qwen2.5 1.5B |1011.16 MiB| 1.78 B | cpu | 4 | tg128|3.83 ± 0.06|
Qwen2.5 3B | 1.86 GiB | 3.40 B | cpu | 4 | pp512|12.08 ± 0.02|
Qwen2.5 3B | 1.86 GiB | 3.40 B | cpu | 4 | tg128|2.23 ± 0.02|

Q4_1
| Model | Size | Params | backend | threads | test | t/s |
| -----------| -------- | ------ | ------- | ------- | ---- |------|
Qwen2.5 0.5B |351.50 MiB|494.03 M| cpu | 4 | pp512|62.07 ± 0.12|
Qwen2.5 0.5B |351.50 MiB|494.03 M| cpu | 4 | tg128|9.91 ± 0.01|
Qwen2.5 1.5B |964.06 MiB| 1.54 B | cpu | 4 | pp512|22.95 ± 0.25|
Qwen2.5 1.5B |964.06 MiB| 1.54 B | cpu | 4 | tg128|4.01 ± 0.15|
Qwen2.5 3B | 1.85 GiB | 3.09 B | cpu | 4 | pp512|11.55 ± 0.16|
Qwen2.5 3B | 1.85 GiB | 3.09 B | cpu | 4 | tg128|2.25 ± 0.04|


Q4_K
| Model | Size | Params | backend | threads | test | t/s |
| -----------| -------- | ------ | ------- | ------- | ---- |------|
Qwen2.5 0.5B |462.96 MiB|630.17 M| cpu | 4 | pp512|9.29 ± 0.05|
Qwen2.5 0.5B |462.96 MiB|630.17 M| cpu | 4 | tg128|5.67 ± 0.04|
Qwen2.5 1.5B | 1.04 GiB | 1.78 B | cpu | 4 | pp512|10.38 ± 0.10|
Qwen2.5 1.5B | 1.04 GiB | 1.78 B | cpu | 4 | tg128|3.17 ± 0.08|
Qwen2.5 3B | 1.95 GiB | 3.40 B | cpu | 4 | pp512|4.23 ± 0.04|
Qwen2.5 3B | 1.95 GiB | 3.40 B | cpu | 4 | tg128|1.73 ± 0.00|
11 changes: 10 additions & 1 deletion ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,16 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
ggml-cpu/arch/riscv/quants.c
ggml-cpu/arch/riscv/repack.cpp
)
if (GGML_RVV)
if (GGML_CPU_RISCV64_SPACEMIT)
list(APPEND ARCH_FLAGS -march=rv64gcv_zfh_zba_zicbop -mabi=lp64d -DGGML_RV_ZFH -D${RISCV64_SPACEMIT_IME_SPEC})
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_RISCV64_SPACEMIT)
list(APPEND GGML_CPU_SOURCES
ggml-cpu/spacemit/ggml_spacemit_ime.cpp
ggml-cpu/spacemit/ggml_spacemit_ime.h
ggml-cpu/spacemit/ggml_spacemit_ime_kernels.cpp
ggml-cpu/spacemit/ggml_spacemit_ime_kernels.h
)
elseif (GGML_RVV)
if (GGML_XTHEADVECTOR)
list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
elseif (GGML_RV_ZFH)
Expand Down
40 changes: 40 additions & 0 deletions ggml/src/ggml-cpu/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3209,6 +3209,26 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
}
#elif defined(__riscv) && defined(__riscv_v) && defined(__riscv_zfh)
int64_t n_loop = n;
__asm__ volatile(
"LOOP%=: \n\t"
"vsetvli t0, %[n], e32, m4,tu,mu \n\t"
"slli t1, t0, 1 \n\t"
"slli t2, t0, 2 \n\t"
"vle32.v v0, (%[IN]) \n\t"
"add %[IN], %[IN], t2 \n\t"
"vsetvli t0, %[n], e16, m2,tu,mu \n\t"
"vfncvt.f.f.w v4, v0 \n\t"
"vse16.v v4, (%[DST]) \n\t"
"add %[DST], %[DST], t1 \n\t"
"sub %[n], %[n], t0 \n\t"
"bnez %[n], LOOP%= \n\t"

: [ IN ] "+r"(x), [ DST ] "+r"(y), [ n ] "+r"(n_loop)
:
: "cc", "t0", "t1", "t2");
i += n;
#endif
for (; i < n; ++i) {
y[i] = GGML_CPU_FP32_TO_FP16(x[i]);
Expand Down Expand Up @@ -3250,6 +3270,26 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
vec_xst(v_yh, 0, (float *)(y + i));
}
#elif defined(__riscv) && defined(__riscv_v) && defined(__riscv_zfh)
int64_t n_loop = n;
__asm__ volatile(
"LOOP%=: \n\t"
"vsetvli t0, %[n], e16, m2,tu,mu \n\t"
"slli t1, t0, 2 \n\t"
"slli t2, t0, 1 \n\t"
"vle16.v v0, (%[IN]) \n\t"
"add %[IN], %[IN], t2 \n\t"
"vfwcvt.f.f.v v4, v0 \n\t"
"vsetvli t0, %[n], e32, m4,tu,mu \n\t"
"vse32.v v4, (%[DST]) \n\t"
"add %[DST], %[DST], t1 \n\t"
"sub %[n], %[n], t0 \n\t"
"bnez %[n], LOOP%= \n\t"

: [ IN ] "+r"(x), [ DST ] "+r"(y), [ n ] "+r"(n_loop)
:
: "cc", "t0", "t1", "t2");
i += n;
#endif

for (; i < n; ++i) {
Expand Down
10 changes: 10 additions & 0 deletions ggml/src/ggml-cpu/ggml-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
# include "kleidiai/kleidiai.h"
#endif

#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
# include "spacemit/ggml_spacemit_ime.h"
#endif

#if defined(_WIN32)
# define WIN32_LEAN_AND_MEAN
# ifndef NOMINMAX
Expand Down Expand Up @@ -45,6 +49,12 @@ std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_type
}
#endif

#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
}
#endif

#ifdef GGML_USE_CPU_KLEIDIAI
if (ggml_backend_cpu_kleidiai_buffer_type()) {
bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());
Expand Down
Loading