From 60e169e950396c309e838d19b76fcbfe4d8a7253 Mon Sep 17 00:00:00 2001
From: WuMinlong <726485702@qq.com>
Date: Tue, 27 Jan 2026 10:08:29 +0800
Subject: [PATCH 1/2] docs: trans rkllm smolvlm2 zh docs
---
.../app-development/ai/rkllm-smolvlm2.md | 10 +
.../rock5b/app-development/rkllm-smolvlm2.md | 10 -
.../current/common/ai/_rkllm_smolvlm2.mdx | 377 ++++++++++++++++++
.../app-development/ai/rkllm-smolvlm2.md | 10 +
4 files changed, 397 insertions(+), 10 deletions(-)
create mode 100644 docs/rock5/rock5b/app-development/ai/rkllm-smolvlm2.md
delete mode 100644 docs/rock5/rock5b/app-development/rkllm-smolvlm2.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/_rkllm_smolvlm2.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-smolvlm2.md
diff --git a/docs/rock5/rock5b/app-development/ai/rkllm-smolvlm2.md b/docs/rock5/rock5b/app-development/ai/rkllm-smolvlm2.md
new file mode 100644
index 000000000..941632e51
--- /dev/null
+++ b/docs/rock5/rock5b/app-development/ai/rkllm-smolvlm2.md
@@ -0,0 +1,10 @@
+---
+sidebar_position: 27
+description: 使用 RKNN 转换Stable Diffusion模型
+---
+
+# RKLLM SmolVLM2
+
+import SMOLVLM2 from "../../../../common/ai/\_rkllm_smolvlm2.mdx";
+
+
diff --git a/docs/rock5/rock5b/app-development/rkllm-smolvlm2.md b/docs/rock5/rock5b/app-development/rkllm-smolvlm2.md
deleted file mode 100644
index 2de6ce062..000000000
--- a/docs/rock5/rock5b/app-development/rkllm-smolvlm2.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sidebar_position: 27
-description: 使用RKNN转换Stable Diffusion模型
----
-
-# RKLLM SmolVLM2
-
-import SMOLVLM2 from "../../../common/ai/\_rkllm_smolvlm2.mdx";
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_rkllm_smolvlm2.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_rkllm_smolvlm2.mdx
new file mode 100644
index 000000000..a5cd2a79b
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_rkllm_smolvlm2.mdx
@@ -0,0 +1,377 @@
+[SmolVLM2](https://huggingface.co/blog/smolvlm2) is a compact yet powerful vision-language model developed by Hugging Face, designed to bring advanced vision-language capabilities to resource-constrained devices such as smartphones and embedded systems.
+These models are characterized by their small footprint and are suitable for running on compact devices, bridging the gap between large models and limited hardware resources.
+This document describes how to use RKLLM to deploy SmolVLM2 [256M](https://huggingface.co/HuggingFaceTB/SmolVLM-256M-Instruct) / [500M](https://huggingface.co/HuggingFaceTB/SmolVLM2-500M-Video-Instruct) / [2.2B](https://huggingface.co/HuggingFaceTB/SmolVLM2-2.2B-Instruct) on RK3588 and run inference accelerated by the NPU.
+
+:::tip
+**Original Contribution**
+
+This model was provided by the Radxa community user @[**Rients Politiek**](https://forum.radxa.com/u/rients_politiek/summary).
+
+Original Radxa community forum post: [**SmolVLM2 for RK3588 NPU**](https://forum.radxa.com/t/smolvlm2-for-rk3588-npu/30077)
+:::
+
+## Model Deployment
+
+SmolVLM2 provides three model sizes. Please choose the parameters according to your needs.
+
+### Parameter Selection
+
+
+
+
+
+
+
+ ```bash
+ export MODEL_SIZE=256m REPO_SIZE=256M
+ ```
+
+
+
+
+
+
+
+
+
+ ```bash
+ export MODEL_SIZE=500m REPO_SIZE=500M
+ ```
+
+
+
+
+
+
+
+
+
+ ```bash
+ export MODEL_SIZE=2.2b REPO_SIZE=2B
+ ```
+
+
+
+
+
+
+
+### Download the Code
+
+
+
+```bash
+git clone https://github.com/Qengineering/SmolVLM2-${REPO_SIZE}-NPU.git && cd SmolVLM2-${REPO_SIZE}-NPU
+```
+
+
+
+### Build the Project
+
+#### Install Dependencies
+
+
+
+```bash
+sudo apt update
+sudo apt install cmake gcc g++ make libopencv-dev
+```
+
+
+
+#### Build with CMake
+
+
+
+```bash
+cmake -B build -DRK_LIB_PATH=${PWD}/aarch64/library -DCMAKE_CXX_FLAGS="-I${PWD}/aarch64/include"
+cmake --build build -j4
+```
+
+
+
+### Download the Model
+
+#### Install hf-cli
+
+
+
+```bash
+curl -LsSf https://hf.co/cli/install.sh | bash
+```
+
+
+
+#### Download the Model
+
+
+
+```bash
+hf download Qengineering/SmolVLM2-${MODEL_SIZE}-rk3588 --local-dir ./SmolVLM2-${MODEL_SIZE}-rk3588
+```
+
+
+
+### Run the Examples
+
+
+
+
+
+
+
+ ```bash
+ export RKLLM_LOG_LEVEL=1
+ # VLM_NPU Picture RKNN_model RKLLM_model NewTokens ContextLength
+ ./VLM_NPU ./Moon.jpg ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2_${MODEL_SIZE}_vision_fp16_rk3588.rknn ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2-${MODEL_SIZE}-instruct_w8a8_rk3588.rkllm 2048 4096
+ ```
+
+
+
+
+
+
+
+
+
+ ```bash
+ export RKLLM_LOG_LEVEL=1
+ # VLM_NPU Picture RKNN_model RKLLM_model NewTokens ContextLength
+ ./VLM_NPU ./Moon.jpg ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2_${MODEL_SIZE}_vision_fp16_rk3588.rknn ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2_${MODEL_SIZE}_llm_w8a8_rk3588.rkllm 2048 4096
+ ```
+
+
+
+
+
+
+
+
+
+ ```bash
+ export RKLLM_LOG_LEVEL=1
+ # VLM_NPU Picture RKNN_model RKLLM_model NewTokens ContextLength
+ ./VLM_NPU ./Moon.jpg ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2-${MODEL_SIZE}_vision_fp16_rk3588.rknn ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2-${MODEL_SIZE}-instruct_w8a8_rk3588.rkllm 2048 4096
+ ```
+
+
+
+
+
+
+
+
+

+ input image
+
+
+```bash
+prompt: Describe the image.
+```
+
+
+
+
+
+ ```bash
+ rock@rock-5b-plus:~/SmolVLM2-256M-NPU$ ./VLM_NPU ./Moon.jpg ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2_${MODEL_SIZE}_vision_fp16_rk3588.rknn ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2-${MODEL_SIZE}-instruct_w8a8_rk3588.rkllm 2048 4096
+ I rkllm: rkllm-runtime version: 1.2.3, rknpu driver version: 0.9.8, platform: RK3588
+ I rkllm: loading rkllm model from ./SmolVLM2-256m-rk3588/smolvlm2-256m-instruct_w8a8_rk3588.rkllm
+ I rkllm: rkllm-toolkit version: 1.2.2, max_context_limit: 4096, npu_core_num: 3, target_platform: RK3588, model_dtype: W8A8
+ I rkllm: Enabled cpus: [4, 5, 6, 7]
+ I rkllm: Enabled cpus num: 4
+ rkllm init success
+ I rkllm: reset chat template:
+ I rkllm: system_prompt: <|im_start|>system\nYou are a helpful assistant.<|im_end|>\n
+ I rkllm: prompt_prefix: <|im_start|>user\n
+ I rkllm: prompt_postfix: <|im_end|>\n<|im_start|>assistant\n
+ W rkllm: Calling rkllm_set_chat_template will disable the internal automatic chat template parsing, including enable_thinking. Make sure your custom prompt is complete and valid.
+
+ used NPU cores 3
+
+ model input num: 1, output num: 1
+
+ Input tensors:
+ index=0, name=pixel_values, n_dims=4, dims=[1, 384, 384, 3], n_elems=442368, size=884736, fmt=NHWC, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+
+ Output tensors:
+ index=0, name=output, n_dims=3, dims=[1, 36, 576, 0], n_elems=20736, size=41472, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+
+ Model input height=384, width=384, channel=3
+
+
+ User: Describe the image.
+ Answer: The image depicts a scene from space, specifically looking at the moon's surface. The moon is in the process of being tidied up and has been cleaned to remove any debris or stains that might have accumulated over time. The overall atmosphere appears to be clear and bright, with no visible signs of pollution or other human activity.
+
+ The image also includes a large number of small objects scattered across the surface of the moon, which appear to be rocks or boulders. These objects are scattered randomly around the moon's surface, creating a sense of randomness and disorder. The overall atmosphere is calm and serene, with no signs of any movement or activity in the scene.
+
+ Overall, this image gives a sense of the beauty and cleanliness of the lunar environment, as well as the ongoing process of tidying up the moon's surface.
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Model init time (ms) 227.84
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Stage Total Time (ms) Tokens Time per Token (ms) Tokens per Second
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Prefill 97.59 78 1.25 799.24
+ I rkllm: Generate 2643.09 166 15.92 62.81
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Peak Memory Usage (GB)
+ I rkllm: 0.59
+ I rkllm: --------------------------------------------------------------------------------------
+ ```
+
+
+
+
+
+ ```bash
+ rock@rock-5b-plus:~/SmolVLM2-500M-NPU$ ./VLM_NPU ./Moon.jpg ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2_${MODEL_SIZE}_vision_fp16_rk3588.rknn ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2_${MODEL_SIZE}_llm_w8a8_rk3588.rkllm 2048 4096
+ I rkllm: rkllm-runtime version: 1.2.3, rknpu driver version: 0.9.8, platform: RK3588
+ I rkllm: loading rkllm model from ./SmolVLM2-500m-rk3588/smolvlm2_500m_llm_w8a8_rk3588.rkllm
+ I rkllm: rkllm-toolkit version: 1.2.2, max_context_limit: 4096, npu_core_num: 3, target_platform: RK3588, model_dtype: W8A8
+ I rkllm: Enabled cpus: [4, 5, 6, 7]
+ I rkllm: Enabled cpus num: 4
+ rkllm init success
+ I rkllm: reset chat template:
+ I rkllm: system_prompt: <|im_start|>system\nYou are a helpful assistant.<|im_end|>\n
+ I rkllm: prompt_prefix: <|im_start|>user\n
+ I rkllm: prompt_postfix: <|im_end|>\n<|im_start|>assistant\n
+ W rkllm: Calling rkllm_set_chat_template will disable the internal automatic chat template parsing, including enable_thinking. Make sure your custom prompt is complete and valid.
+
+ used NPU cores 3
+
+ model input num: 1, output num: 1
+
+ Input tensors:
+ index=0, name=pixel_values, n_dims=4, dims=[1, 384, 384, 3], n_elems=442368, size=884736, fmt=NHWC, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+
+ Output tensors:
+ index=0, name=output, n_dims=3, dims=[1, 36, 960, 0], n_elems=34560, size=69120, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+
+ Model input height=384, width=384, channel=3
+
+
+ User: Describe the image.
+ Answer: The image is a surreal and fantastical representation of a space station orbiting a planet, set against a backdrop of stars and nebulae. The station, which resembles a large, spherical structure with multiple levels and windows, is depicted as being constructed from metallic materials that reflect the light of the distant stars. The station's interior is filled with various objects and structures, including what appears to be a control room or laboratory area, complete with computers, monitors, and other equipment.
+
+ The planet itself is depicted as having a surface covered in a thick layer of ice or snow, which gives it a cold and desolate appearance. The sky above the station is filled with stars, creating a sense of vastness and isolation. The overall atmosphere of the image suggests that the space station is located in a region of space where there are no other planets or celestial bodies visible in the background.
+
+ The colors in the image are predominantly dark and muted, with the exception of the bright lights and reflective surfaces of the station's interior. This contrast creates a sense of depth and distance, drawing the viewer's eye towards the central structure of the space station. The image also features a series of small, glowing orbs scattered throughout the scene, which add to the surreal and dreamlike quality of the image.
+
+ Overall, the image is a striking representation of a space station orbiting a planet in a region of space where there are no other celestial bodies visible in the background. It evokes a sense of wonder and curiosity about the possibilities of life beyond our own planet.
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Model init time (ms) 512.04
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Stage Total Time (ms) Tokens Time per Token (ms) Tokens per Second
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Prefill 150.43 78 1.93 518.52
+ I rkllm: Generate 7967.56 311 25.62 39.03
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Peak Memory Usage (GB)
+ I rkllm: 0.88
+ I rkllm: --------------------------------------------------------------------------------------
+ ```
+
+
+
+
+
+
+ ```bash
+ rock@rock-5b-plus:~/SmolVLM2-2B-NPU$ ./VLM_NPU ./Moon.jpg ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2-${MODEL_SIZE}_vision_fp16_rk3588.rknn ./SmolVLM2-${MODEL_SIZE}-rk3588/smolvlm2-${MODEL_SIZE}-instruct_w8a8_rk3588.rkllm 2048 4096
+ I rkllm: rkllm-runtime version: 1.2.3, rknpu driver version: 0.9.8, platform: RK3588
+ I rkllm: loading rkllm model from ./SmolVLM2-2.2b-rk3588/smolvlm2-2.2b-instruct_w8a8_rk3588.rkllm
+ I rkllm: rkllm-toolkit version: 1.2.2, max_context_limit: 4096, npu_core_num: 3, target_platform: RK3588, model_dtype: W8A8
+ I rkllm: Enabled cpus: [4, 5, 6, 7]
+ I rkllm: Enabled cpus num: 4
+ rkllm init success
+ I rkllm: reset chat template:
+ I rkllm: system_prompt: <|im_start|>system\nYou are a helpful assistant.<|im_end|>\n
+ I rkllm: prompt_prefix: <|im_start|>user\n
+ I rkllm: prompt_postfix: <|im_end|>\n<|im_start|>assistant\n
+ W rkllm: Calling rkllm_set_chat_template will disable the internal automatic chat template parsing, including enable_thinking. Make sure your custom prompt is complete and valid.
+
+ used NPU cores 3
+
+ model input num: 1, output num: 1
+
+ Input tensors:
+ index=0, name=pixel_values, n_dims=4, dims=[1, 384, 384, 3], n_elems=442368, size=884736, fmt=NHWC, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+
+ Output tensors:
+ index=0, name=output, n_dims=3, dims=[1, 81, 2048, 0], n_elems=165888, size=331776, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+
+ Model input height=384, width=384, channel=3
+
+
+ User: Describe the image.
+ Answer: In this captivating image, an astronaut is comfortably seated on the surface of the moon, which is bathed in the soft glow of a distant star. The lunar landscape stretches out around him, punctuated by craters and mountains that add texture to the otherwise barren terrain.
+
+ The astronaut himself is clad in a pristine white spacesuit, its reflective visor gleaming under the celestial light. His helmet is adorned with a gold visor, adding an air of sophistication to his appearance. A green bottle rests casually on his lap, suggesting a moment of relaxation amidst the vastness of space.
+
+ In the background, Earth hangs in the sky, its blue and white hues contrasting sharply with the moon's gray surface. The planet is dotted with clouds, hinting at the diversity of life that exists within its atmosphere.
+
+ The image as a whole paints a picture of exploration and discovery, capturing not just the physical environment but also the emotional journey of an astronaut venturing into the unknown. It's a testament to human ingenuity and our innate desire to explore the cosmos.
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Model init time (ms) 2096.35
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Stage Total Time (ms) Tokens Time per Token (ms) Tokens per Second
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Prefill 608.84 123 4.95 202.02
+ I rkllm: Generate 15548.70 214 72.66 13.76
+ I rkllm: --------------------------------------------------------------------------------------
+ I rkllm: Peak Memory Usage (GB)
+ I rkllm: 3.39
+ I rkllm: --------------------------------------------------------------------------------------
+ ```
+
+
+
+
+
+## Performance Analysis
+
+
+
+
+
+On ROCK 5B+ it reaches 62.81 tokens/s.
+
+| Stage | Total Time (ms) | Tokens | Time per Token (ms) | Tokens per Second |
+| -------- | --------------- | ------ | ------------------- | ----------------- |
+| Prefill | 97.59 | 78 | 1.25 | 799.24 |
+| Generate | 2643.09 | 166 | 15.92 | 62.81 |
+
+
+
+
+
+On ROCK 5B+ it reaches 39.03 tokens/s.
+
+| Stage | Total Time (ms) | Tokens | Time per Token (ms) | Tokens per Second |
+| -------- | --------------- | ------ | ------------------- | ----------------- |
+| Prefill | 150.43 | 78 | 1.93 | 518.52 |
+| Generate | 7967.56 | 311 | 25.62 | 39.03 |
+
+
+
+
+
+On ROCK 5B+ it reaches 13.76 tokens/s.
+
+| Stage | Total Time (ms) | Tokens | Time per Token (ms) | Tokens per Second |
+| -------- | --------------- | ------ | ------------------- | ----------------- |
+| Prefill | 608.84 | 123 | 4.95 | 202.02 |
+| Generate | 15548.70 | 214 | 72.66 | 13.76 |
+
+
+
+
+
+## Memory Usage
+
+| | 256M | 500M | 2.2B |
+| ---------------------- | ---- | ---- | ---- |
+| Peak Memory Usage (GB) | 0.59 | 0.88 | 3.39 |
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-smolvlm2.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-smolvlm2.md
new file mode 100644
index 000000000..cf8b49c0f
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-smolvlm2.md
@@ -0,0 +1,10 @@
+---
+sidebar_position: 27
+description: Use RKLLM SmolVLM2 on ROCK 5B
+---
+
+# RKLLM SmolVLM2
+
+import SMOLVLM2 from "../../../../common/ai/\_rkllm_smolvlm2.mdx";
+
+
From 7d5be9d7fae2e87aceaa55cd04d251f947e60382 Mon Sep 17 00:00:00 2001
From: WuMinlong <726485702@qq.com>
Date: Tue, 27 Jan 2026 11:44:21 +0800
Subject: [PATCH 2/2] docs: trans rock 5b and orion ai dev zh docs
---
.../common/ai/_ernie-4_5-0_3b_llama_cpp.mdx | 48 +-
.../ai/_ernie-4_5-21b-a3b_llama_cpp.mdx | 48 +-
.../current/common/ai/_rknn_custom_yolo.mdx | 443 +++---------------
.../common/ai/_stable_diffusion_convert.mdx | 66 +--
.../current/common/ai/rockchip/_clip.mdx | 194 ++++++++
.../current/common/ai/rockchip/_deeplabv3.mdx | 216 +++++++++
.../current/common/ai/rockchip/_lprnet.mdx | 173 +++++++
.../current/common/ai/rockchip/_mobilenet.mdx | 161 +++++++
.../current/common/ai/rockchip/_mobilesam.mdx | 201 ++++++++
.../current/common/ai/rockchip/_ppocr.mdx | 248 ++++++++++
.../current/common/ai/rockchip/_ppyoloe.mdx | 209 +++++++++
.../current/common/ai/rockchip/_resnet.mdx | 160 +++++++
.../common/ai/rockchip/_retinaface.mdx | 207 ++++++++
.../common/ai/rockchip/_rknn_model_zoo.mdx | 138 ++++++
.../current/common/ai/rockchip/_wav2vec2.mdx | 172 +++++++
.../current/common/ai/rockchip/_whisper.mdx | 262 +++++++++++
.../common/ai/rockchip/_yolo_world.mdx | 288 ++++++++++++
.../current/common/ai/rockchip/_yolov8.mdx | 198 ++++++++
.../common/ai/rockchip/_yolov8_seg.mdx | 209 +++++++++
.../current/common/dev/_rkllm-deepseek-r1.mdx | 243 +++++-----
.../current/common/dev/_rkllm-install.mdx | 171 +++----
.../current/common/dev/_rkllm-usage.mdx | 166 ++-----
.../current/common/dev/_rkllm_qwen2_vl.mdx | 324 ++++++-------
.../artificial-intelligence/_llava-1-6-7b.mdx | 318 +++++++++++++
.../_minicpm-o-2-6.mdx | 306 ------------
.../_minicpm-v-2-6.mdx | 311 ++++++++++++
.../_mobilenet-v2-int8.mdx | 128 +++++
.../_qwen2-5-vl-3b.mdx | 31 +-
.../artificial-intelligence/_qwen2vl-2b.mdx | 30 +-
.../Multimodality/README.md | 2 +-
.../Multimodality/llava-1-6-7b.md | 9 +
.../Multimodality/minicpm-v-2-6.md | 9 +
.../Multimodality/qwen2-5-vl-3b.md | 2 +-
.../Multimodality/qwen2vl-2b.md | 2 +-
.../Vision/mobilenet-v2-int8.md | 9 +
.../Multimodality/README.md | 2 +-
.../Multimodality/llava-1-6-7b.md | 9 +
.../Multimodality/minicpm-v-2-6.md | 9 +
.../Multimodality/qwen2-5-vl-3b.md | 2 +-
.../Multimodality/qwen2vl-2b.md | 2 +-
.../Vision/mobilenet-v2-int8.md | 9 +
.../rock5/rock5b/app-development/README.md | 4 +-
.../rock5/rock5b/app-development/ai/README.md | 9 +
.../rock5/rock5b/app-development/ai/clip.md | 9 +
.../rock5b/app-development/ai/deeplabv3.md | 9 +
.../rock5/rock5b/app-development/ai/lprnet.md | 9 +
.../rock5b/app-development/ai/mobilenet.md | 9 +
.../rock5b/app-development/ai/mobilesam.md | 9 +
.../rock5/rock5b/app-development/ai/ppocr.md | 9 +
.../rock5b/app-development/ai/ppyoloe.md | 9 +
.../rock5/rock5b/app-development/ai/resnet.md | 9 +
.../rock5b/app-development/ai/retinaface.md | 9 +
.../app-development/ai/rkllm-deepseek-r1.md | 9 +
.../app-development/ai/rkllm-install.md | 9 +
.../app-development/ai/rkllm-qwen2-vl.md | 10 +
.../rock5b/app-development/ai/rkllm-usage.md | 9 +
.../app-development/ai/rknn-custom-yolo.md | 10 +
.../rock5b/app-development/ai/rknn-install.md | 10 +
.../app-development/ai/rknn-model-zoo.md | 9 +
.../app-development/ai/rknn-quick-start.md | 9 +
.../ai/rknn-stable-diffusion.md | 10 +
.../ai/rknn-toolkit-lite2-yolov5.md | 9 +
.../ai/rknn-toolkit-lite2-yolov8.md | 10 +
.../app-development/ai/rknn-toolkit2-pc.md | 10 +
.../app-development/ai/rknn-ultralytics.md | 10 +
.../rock5b/app-development/ai/wav2vec2.md | 9 +
.../rock5b/app-development/ai/whisper.md | 9 +
.../rock5b/app-development/ai/yolo-world.md | 9 +
.../rock5b/app-development/ai/yolov8-seg.md | 9 +
.../rock5/rock5b/app-development/ai/yolov8.md | 9 +
.../rock5/rock5b/app-development/gpiod.md | 4 +-
.../rock5/rock5b/app-development/hdmi-rx.md | 4 +-
.../rock5/rock5b/app-development/moonlight.md | 2 +-
.../rock5/rock5b/app-development/mraa.md | 6 +-
.../rock5/rock5b/app-development/opencv.md | 4 +-
.../app-development/qt-cross-compile.md | 10 +
.../app-development/qt_cross_compile.md | 10 -
.../rock5/rock5b/app-development/redroid.md | 6 +-
.../app-development/rkllm_deepseek_r1.md | 9 -
.../rock5b/app-development/rkllm_install.md | 10 -
.../rock5b/app-development/rkllm_qwen2_vl.md | 10 -
.../rock5b/app-development/rkllm_usage.md | 10 -
.../app-development/rknn_custom_yolo.md | 10 -
.../rock5b/app-development/rknn_install.md | 10 -
.../app-development/rknn_quick_start.md | 9 -
.../app-development/rknn_toolkit2_pc.md | 10 -
.../rknn_toolkit_lite2_yolov5.md | 9 -
.../rknn_toolkit_lite2_yolov8.md | 10 -
.../app-development/rknn_ultralytics.md | 10 -
.../{ros2_humble.md => ros2-humble.md} | 4 +-
.../rock5/rock5b/app-development/rtsp.md | 4 +-
.../{venv_usage.md => venv-usage.md} | 4 +-
.../app-development/vscode-remote-ssh.md | 4 +-
93 files changed, 4729 insertions(+), 1438 deletions(-)
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_clip.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_deeplabv3.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_lprnet.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_mobilenet.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_mobilesam.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_ppocr.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_ppyoloe.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_resnet.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_retinaface.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_rknn_model_zoo.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_wav2vec2.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_whisper.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolo_world.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolov8.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolov8_seg.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_llava-1-6-7b.mdx
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_minicpm-o-2-6.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_minicpm-v-2-6.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_mobilenet-v2-int8.mdx
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/llava-1-6-7b.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/minicpm-v-2-6.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Vision/mobilenet-v2-int8.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/llava-1-6-7b.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/minicpm-v-2-6.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Vision/mobilenet-v2-int8.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/README.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/clip.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/deeplabv3.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/lprnet.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/mobilenet.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/mobilesam.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/ppocr.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/ppyoloe.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/resnet.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/retinaface.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-deepseek-r1.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-install.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-qwen2-vl.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-usage.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-custom-yolo.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-install.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-model-zoo.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-quick-start.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-stable-diffusion.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit-lite2-yolov5.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit-lite2-yolov8.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit2-pc.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-ultralytics.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/wav2vec2.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/whisper.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolo-world.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolov8-seg.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolov8.md
create mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/qt-cross-compile.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/qt_cross_compile.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_deepseek_r1.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_install.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_qwen2_vl.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_usage.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_custom_yolo.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_install.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_quick_start.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit2_pc.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit_lite2_yolov5.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit_lite2_yolov8.md
delete mode 100644 i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_ultralytics.md
rename i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/{ros2_humble.md => ros2-humble.md} (67%)
rename i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/{venv_usage.md => venv-usage.md} (60%)
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_ernie-4_5-0_3b_llama_cpp.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_ernie-4_5-0_3b_llama_cpp.mdx
index 8256c7ece..1449c2118 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_ernie-4_5-0_3b_llama_cpp.mdx
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_ernie-4_5-0_3b_llama_cpp.mdx
@@ -1,14 +1,18 @@
-This document describes how to use llama.cpp with [KleidiAI](https://www.arm.com/markets/artificial-intelligence/software/kleidi) on Radxa Orion O6 / O6N to accelerate inference for Baidu ERNIE models: [ERNIE-4.5-0.3B](https://huggingface.co/baidu/ERNIE-4.5-0.3B-PT) and [ERNIE-4.5-0.3B-Base](https://huggingface.co/baidu/ERNIE-4.5-0.3B-Base-PT).
+This document explains how to run Baidu ERNIE models on the Radxa Orion O6 / O6N using `llama.cpp` with [KleidiAI](https://www.arm.com/markets/artificial-intelligence/software/kleidi) acceleration:
+[ERNIE-4.5-0.3B](https://huggingface.co/baidu/ERNIE-4.5-0.3B-PT) and
+[ERNIE-4.5-0.3B-Base](https://huggingface.co/baidu/ERNIE-4.5-0.3B-Base-PT).
Model links:
- [ERNIE-4.5-0.3B-PT](https://huggingface.co/baidu/ERNIE-4.5-0.3B-PT)
- [ERNIE-4.5-0.3B-Base-PT](https://huggingface.co/baidu/ERNIE-4.5-0.3B-Base-PT)
-## Download models
+## Download the model
-Radxa provides prebuilt GGUF files: [ERNIE-4.5-0.3B-PT-Q4_0.gguf](https://modelscope.cn/models/radxa/ERNIE-4.5-GGUF/file/view/master/ERNIE-4.5-0.3B-PT-Q4_0.gguf?status=2)
-and [ERNIE-4.5-0.3B-Base-PT-Q4_0.gguf](https://modelscope.cn/models/radxa/ERNIE-4.5-GGUF/file/view/master/ERNIE-4.5-0.3B-Base-PT-Q4_0.gguf?status=2). You can download them using `modelscope`.
+Radxa provides pre-built GGUF files:
+[ERNIE-4.5-0.3B-PT-Q4_0.gguf](https://modelscope.cn/models/radxa/ERNIE-4.5-GGUF/file/view/master/ERNIE-4.5-0.3B-PT-Q4_0.gguf?status=2) and
+[ERNIE-4.5-0.3B-Base-PT-Q4_0.gguf](https://modelscope.cn/models/radxa/ERNIE-4.5-GGUF/file/view/master/ERNIE-4.5-0.3B-Base-PT-Q4_0.gguf?status=2).
+You can download them with `modelscope`:
@@ -39,23 +43,23 @@ and [ERNIE-4.5-0.3B-Base-PT-Q4_0.gguf](https://modelscope.cn/models/radxa/ERNIE-
-## Model conversion
+## Convert the model (optional)
:::tip
-If you are interested in converting models to GGUF, follow this section to perform the conversion on an x86 host.
+If you want to convert the model to GGUF yourself, follow this section on an x86 host.
-If you do not want to convert models yourself, download the GGUF models provided by Radxa and skip to [**Model inference**](#model-inference).
+Otherwise, download the pre-built GGUF from Radxa and skip to [**Inference**](#inference).
:::
### Build llama.cpp
-Build llama.cpp on an x86 host.
+Build `llama.cpp` on an x86 host.
:::tip
-Follow [**llama.cpp**](../../orion/o6/app-development/artificial-intelligence/llama_cpp.md) to build llama.cpp on an x86 host.
+Follow [**llama.cpp**](../llama-cpp) to build `llama.cpp` on an x86 host.
:::
-Build commands:
+Build command:
@@ -68,9 +72,9 @@ cmake --build build --config Release
-### Download the model
+### Download the source model
-Use `modelscope` to download the source model.
+Use `modelscope` to download the original model:
@@ -100,7 +104,7 @@ Use `modelscope` to download the source model.
-### Convert to a floating-point GGUF model
+### Convert to a float (F16) GGUF
@@ -130,11 +134,11 @@ Use `modelscope` to download the source model.
-Running `convert_hf_to_gguf.py` will generate an F16 floating-point GGUF model in the source model directory.
+Running `convert_hf_to_gguf.py` generates an F16 (float) GGUF file in the model directory.
-### Quantize the GGUF model
+### Quantize the GGUF
-Use `llama-quantize` to quantize the floating-point GGUF model to Q4_0.
+Use `llama-quantize` to quantize the float GGUF to Q4_0:
@@ -164,17 +168,17 @@ Use `llama-quantize` to quantize the floating-point GGUF model to Q4_0.
-Running `llama-quantize` will generate a GGUF model with the specified quantization in the target directory.
+Running `llama-quantize` generates a GGUF file with the selected quantization format in the target path.
-## Model inference
+## Inference
### Build llama.cpp
:::tip
-Follow [**llama.cpp**](../../orion/o6/app-development/artificial-intelligence/llama_cpp.md) to build llama.cpp with **KleidiAI** enabled on Radxa Orion O6 / O6N.
+Follow [**llama.cpp**](../llama-cpp) to build `llama.cpp` with **KleidiAI** enabled on the Radxa Orion O6 / O6N.
:::
-Build commands:
+Build command:
@@ -189,7 +193,7 @@ cmake --build build --config Release
### Run inference
-Use `llama-cli` to chat with the model.
+Use `llama-cli` to chat with the model:
@@ -283,7 +287,7 @@ Use `llama-cli` to chat with the model.
-## Performance analysis
+## Performance benchmarking
You can use `llama-bench` to benchmark the model.
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_ernie-4_5-21b-a3b_llama_cpp.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_ernie-4_5-21b-a3b_llama_cpp.mdx
index c53a6a930..7ba145cbe 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_ernie-4_5-21b-a3b_llama_cpp.mdx
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_ernie-4_5-21b-a3b_llama_cpp.mdx
@@ -1,14 +1,18 @@
-This document describes how to use llama.cpp with [KleidiAI](https://www.arm.com/markets/artificial-intelligence/software/kleidi) on Radxa Orion O6 / O6N to accelerate inference for Baidu ERNIE models: [ERNIE-4.5-21B-A3B](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-PT) and [ERNIE-4.5-21B-A3B-Base](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Base-PT).
+This document explains how to run Baidu ERNIE models on the Radxa Orion O6 / O6N using `llama.cpp` with [KleidiAI](https://www.arm.com/markets/artificial-intelligence/software/kleidi) acceleration:
+[ERNIE-4.5-21B-A3B](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-PT) and
+[ERNIE-4.5-21B-A3B-Base](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Base-PT).
Model links:
- [ERNIE-4.5-21B-A3B-PT](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-PT)
- [ERNIE-4.5-21B-A3B-Base-PT](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Base-PT)
-## Download models
+## Download the model
-Radxa provides prebuilt GGUF files: [ERNIE-4.5-21B-A3B-PT-Q4_0.gguf](https://modelscope.cn/models/radxa/ERNIE-4.5-GGUF/file/view/master/ERNIE-4.5-21B-A3B-PT-Q4_0.gguf?status=2)
-and [ERNIE-4.5-21B-A3B-Base-PT-Q4_0.gguf](https://modelscope.cn/models/radxa/ERNIE-4.5-GGUF/file/view/master/ERNIE-4.5-21B-A3B-Base-PT-Q4_0.gguf?status=2). You can download them using `modelscope`.
+Radxa provides pre-built GGUF files:
+[ERNIE-4.5-21B-A3B-PT-Q4_0.gguf](https://modelscope.cn/models/radxa/ERNIE-4.5-GGUF/file/view/master/ERNIE-4.5-21B-A3B-PT-Q4_0.gguf?status=2) and
+[ERNIE-4.5-21B-A3B-Base-PT-Q4_0.gguf](https://modelscope.cn/models/radxa/ERNIE-4.5-GGUF/file/view/master/ERNIE-4.5-21B-A3B-Base-PT-Q4_0.gguf?status=2).
+You can download them with `modelscope`:
@@ -39,23 +43,23 @@ and [ERNIE-4.5-21B-A3B-Base-PT-Q4_0.gguf](https://modelscope.cn/models/radxa/ERN
-## Model conversion
+## Convert the model (optional)
:::tip
-If you are interested in converting models to GGUF, follow this section to perform the conversion on an x86 host.
+If you want to convert the model to GGUF yourself, follow this section on an x86 host.
-If you do not want to convert models yourself, download the GGUF models provided by Radxa and skip to [**Model inference**](#model-inference).
+Otherwise, download the pre-built GGUF from Radxa and skip to [**Inference**](#inference).
:::
### Build llama.cpp
-Build llama.cpp on an x86 host.
+Build `llama.cpp` on an x86 host.
:::tip
-Follow [**llama.cpp**](../../orion/o6/app-development/artificial-intelligence/llama_cpp.md) to build llama.cpp on an x86 host.
+Follow [**llama.cpp**](../llama-cpp) to build `llama.cpp` on an x86 host.
:::
-Build commands:
+Build command:
@@ -68,9 +72,9 @@ cmake --build build --config Release
-### Download the model
+### Download the source model
-Use `modelscope` to download the source model.
+Use `modelscope` to download the original model:
@@ -101,7 +105,7 @@ Use `modelscope` to download the source model.
-### Convert to a floating-point GGUF model
+### Convert to a float (F16) GGUF
@@ -132,11 +136,11 @@ Use `modelscope` to download the source model.
-Running `convert_hf_to_gguf.py` will generate an F16 floating-point GGUF model in the source model directory.
+Running `convert_hf_to_gguf.py` generates an F16 (float) GGUF file in the model directory.
-### Quantize the GGUF model
+### Quantize the GGUF
-Use `llama-quantize` to quantize the floating-point GGUF model to Q4_0.
+Use `llama-quantize` to quantize the float GGUF to Q4_0:
@@ -166,17 +170,17 @@ Use `llama-quantize` to quantize the floating-point GGUF model to Q4_0.
-Running `llama-quantize` will generate a GGUF model with the specified quantization in the target directory.
+Running `llama-quantize` generates a GGUF file with the selected quantization format in the target path.
-## Model inference
+## Inference
### Build llama.cpp
:::tip
-Follow [**llama.cpp**](../../orion/o6/app-development/artificial-intelligence/llama_cpp.md) to build llama.cpp with **KleidiAI** enabled on Radxa Orion O6 / O6N.
+Follow [**llama.cpp**](../llama-cpp) to build `llama.cpp` with **KleidiAI** enabled on the Radxa Orion O6 / O6N.
:::
-Build commands:
+Build command:
@@ -191,7 +195,7 @@ cmake --build build --config Release
### Run inference
-Use `llama-cli` to chat with the model.
+Use `llama-cli` to chat with the model:
@@ -321,7 +325,7 @@ Use `llama-cli` to chat with the model.
-## Performance analysis
+## Performance benchmarking
You can use `llama-bench` to benchmark the model.
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_rknn_custom_yolo.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_rknn_custom_yolo.mdx
index 69cfde289..3895d07fb 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_rknn_custom_yolo.mdx
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_rknn_custom_yolo.mdx
@@ -1,6 +1,7 @@
-Many users can successfully run the YOLO model examples in the [RKNN Model Zoo](https://github.com/airockchip/rknn_model_zoo) repository, but they often don't know how to convert their own trained YOLO series models. The models used in [Deploying YOLOv5 Object Detection on Device](rknn_toolkit_lite2_yolov5) or the YOLO series models in [RKNN Model Zoo](https://github.com/airockchip/rknn_model_zoo) are ONNX models provided by Rockchip and pre-compiled RKNN models provided by Radxa. This document will explain how to compile your own trained YOLO models using RKNN and perform inference on the device.
+This document explains how to convert **your own trained YOLO models** to RKNN and run them on-device with a Rockchip NPU.
+Many users can run the YOLO demos in [RKNN Model Zoo](https://github.com/airockchip/rknn_model_zoo), but struggle when converting custom-trained models.
-This document applies to the following models:
+This guide applies to:
- ultralytics-yolov5
- yolov6
@@ -12,76 +13,57 @@ This document applies to the following models:
- YOLO-World
:::tip
-This document uses a yolo11n model trained on a custom dataset as an example, which is applied to person head detection in aerial vision. This tutorial focuses only on how to convert self-trained models, not on the accuracy of the training model itself.
+This guide uses a custom-trained **yolo11n** model as an example (head detection). The model and test data were provided by Radxa community user @[sanskarjainba-hub](https://github.com/radxa-docs/docs/issues/1081).
+:::
-This model is provided by Radxa community user @[sanskarjainba-hub](https://github.com/radxa-docs/docs/issues/1081)
+## Why custom models often fail to convert
-Known conditions for this example:
-
-- Model trained on a custom dataset
-- Model architecture is yolo11n
-- Model has only one label: "PERSON"
-- Test input data is provided
- :::
-
-## Differences Between Provided Models and Self-Trained Models
-
-First, we need to understand why users encounter errors when compiling their own trained YOLO models, why the accuracy decreases after quantization compilation, or how to handle post-processing.
-Before that, we need to know the differences between the provided model structure and the self-trained model structure. We use Netron to compare the input and output nodes of the ONNX models.
+The YOLO models shipped in RKNN Model Zoo usually include extra post-processing blocks or have different output layouts compared to your own exported models.
+For example, the official YOLO11 model in RKNN Model Zoo outputs **multiple heads**, while a typical custom model may output **a single head**.
-

- yolo11n model output structure from RKNN Model Zoo
+

+ Output layout of the YOLO11n model from RKNN Model Zoo
-

- Self-trained yolo11n model output structure
+

+ Output layout of a custom-trained YOLO11n model
-As we can see from the comparison, there are differences in the model output nodes. The yolo11 model downloaded from RKNN Model Zoo has 9 output heads, while the self-trained model has only 1 output head.
-
-### Key Model Differences
+Typical differences:
-- Modified output structure, removed post-processing structure (post-processing results are not quantization-friendly)
+- Remove post-processing layers inside the model (post-process outputs are not quantization-friendly).
+- Move DFL / decode logic to CPU-side post-processing (often faster on NPU overall).
+- Optionally add helper outputs to speed up threshold filtering during post-processing.
-- The DFL (Distribution Focal Loss) structure performs poorly on NPU, so it's moved to the post-processing stage outside the model. This operation can improve inference performance in most cases.
+When you remove these blocks, you must do post-processing on the CPU (you can reuse implementations from RKNN Model Zoo).
-- Added a confidence sum branch to the model output to accelerate threshold filtering during post-processing.
+## Model conversion approaches
-All the operations removed above need to be handled by the CPU externally. (The corresponding post-processing code can be found in RKNN_Model_Zoo)
+There are two practical approaches:
-## Model Conversion
+1. **FP16 / mixed quantization (keep your existing post-processing)**
-There are two approaches for model conversion:
+ - Fastest path, minimal code changes.
+ - Performance improvement is limited compared to a fully optimized INT8 pipeline.
-- FP16 floating-point conversion/automatic mixed quantization, keeping the original project's post-processing code (simplest approach, maintains original project code, but with limited performance improvement)
-- INT8 quantization conversion, modifying the model's post-processing structure to use RKNN Model Zoo's post-processing code (best performance, INT8 quantization with optimized post-processing)
+2. **INT8 quantization (use RKNN Model Zoo post-processing)**
+ - Best performance.
+ - Requires adjusting the model output structure and using external post-processing.
-### Model Preparation
+## Prepare the model and test input
-Prepare your own PyTorch format model. Here we use best.pt and a test image as an example.
+Have your PyTorch model (e.g. `best.pt`) and a test image.
-

+

Test image
-### Inference with PyTorch on CPU
+### Baseline: CPU inference with the PyTorch model
-First, we'll use the CPU on the device to run inference with the original PyTorch model to collect inference speed and accuracy data. We'll use the `ultralytics` tool for verification (for non-ultralytics YOLO models, please verify accordingly).
+Run a baseline with `ultralytics` on the device:
@@ -92,35 +74,25 @@ yolo predict model=best.pt source="../test_img/frame_00304.jpg"
+Example output:
+
```bash
-(.venv) rock@rock-5b-plus:~/ssd/rknn/rknn_model_zoo/examples/yolo11/model$ yolo predict model=best.pt source="../test_img/frame_00304.jpg"
-Ultralytics 8.3.231 🚀 Python-3.11.2 torch-2.9.1+cpu CPU (aarch64)
-YOLO11n summary (fused): 100 layers, 2,582,347 parameters, 0 gradients, 6.3 GFLOPs
-
-image 1/1 /mnt/ssd/rknn/rknn_model_zoo/examples/yolo11/model/../test_img/frame_00304.jpg: 384x640 3 PERSONs, 268.8ms
-Speed: 4.1ms preprocess, 268.8ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)
-Results saved to /mnt/ssd/rknn/rknn_model_zoo/examples/yolo11/model/runs/detect/predict3
-💡 Learn more at https://docs.ultralytics.com/modes/predict
+image 1/1 ...: 384x640 3 PERSONs, 268.8ms
+Speed: ... 268.8ms inference ...
```
-The PyTorch model inference speed is **268.8ms**
+In this example, the PyTorch CPU inference time is **268.8 ms**.
-

- Inference result of best.pt
+

+ `best.pt` inference result
-### Converting FP16 Floating-Point Model
-
-If users do not wish to modify any project code and only want to migrate model inference to NPU, they can use the full FP16 model conversion.
+## Approach A: FP16 RKNN (minimal changes)
-#### Using Ultralytics for Conversion
+### Convert with Ultralytics (recommended for Ultralytics models)
-For models released by Ultralytics, you can use Ultralytics for model conversion. For detailed instructions, please refer to [RKNN Ultralytics YOLOv11](./rknn_ultralytics)
+If your model is an Ultralytics model, you can convert directly:
@@ -130,64 +102,48 @@ yolo export model=best.pt format=rknn name=rk3588
-The results are saved in ./best_rknn_model
+The result will be saved to `./best_rknn_model`.
-#### Inference with Ultralytics
+### Run inference with the FP16 RKNN model
-Copy the successfully converted best_rknn_model directory to the device, and use the ultralytics tool to directly perform NPU inference with the floating-point model
+Copy the `best_rknn_model` folder to the device and run:
```bash
-yolo predict model='./best_rknn_model' source='../test_img/frame_00304.jpg'
+yolo predict model="./best_rknn_model" source="../test_img/frame_00304.jpg"
```
+Example output:
+
```bash
-(.venv) rock@rock-5b-plus:~/ssd/rknn/rknn_model_zoo/examples/yolo11/model$ yolo predict model='./best_rknn_model' source='../test_img/frame_00304.jpg'
-WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify','pose' or 'obb'.
-Ultralytics 8.3.231 🚀 Python-3.11.2 torch-2.9.1+cpu CPU (aarch64)
-Loading ./best_rknn_model for RKNN inference...
-W rknn-toolkit-lite2 version: 2.3.2
-I RKNN: [08:27:32.098] RKNN Runtime Information, librknnrt version: 2.3.2 (429f97ae6b@2025-04-09T09:09:27)
-I RKNN: [08:27:32.098] RKNN Driver Information, version: 0.9.8
-I RKNN: [08:27:32.098] RKNN Model Information, version: 6, toolkit version: 2.3.0(compiler version: 2.3.0 (c949ad889d@2024-11-07T11:39:30)), target: RKNPU v2, target platform: rk3588, framework name: ONNX, framework layout: NCHW, model inference type: static_shape
-W RKNN: [08:27:32.122] query RKNN_QUERY_INPUT_DYNAMIC_RANGE error, rknn model is static shape type, please export rknn with dynamic_shapes
-W Query dynamic range failed. Ret code: RKNN_ERR_MODEL_INVALID. (If it is a static shape RKNN model, please ignore the above warning message.)
-
-image 1/1 /mnt/ssd/rknn/rknn_model_zoo/examples/yolo11/model/../test_img/frame_00304.jpg: 640x640 3 PERSONs, 64.3ms
-Speed: 6.6ms preprocess, 64.3ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 640)
-Results saved to /mnt/ssd/rknn/rknn_model_zoo/examples/yolo11/model/runs/detect/predict4
-💡 Learn more at https://docs.ultralytics.com/modes/predict
+image 1/1 ...: 640x640 3 PERSONs, 64.3ms
+Speed: ... 64.3ms inference ...
```
-The floating-point RKNN model inference speed is **64.3ms**
+In this example, FP16 RKNN inference time is **64.3 ms**.
-

- Inference result of best-rk3588-fp.rknn
+

+ FP16 RKNN inference result
-#### Using Script Conversion
+### Convert with Model Zoo scripts (non-Ultralytics models)
-For non-Ultralytics YOLO models, there is a `python/convert.py` script in the corresponding YOLO model directory under rknn_model_zoo/examples. You can use this script to directly convert an ONNX model to an FP format RKNN model.
-After exporting the PyTorch model to an ONNX model, use the convert.py script and specify the quantization type as `fp`.
+If your model is not an Ultralytics export, RKNN Model Zoo provides `python/convert.py` scripts under the corresponding YOLO example directories.
+Export your model to ONNX first and then convert to RKNN with `quant_dtype=fp`.
-For details, please refer to [Deploying YOLOv5 Object Detection on Device](rknn_toolkit_lite2_yolov5#pc-model-conversion)
+See: [Deploy YOLOv5 on the Device](rknn_toolkit_lite2_yolov5).
-### Converting INT8 Quantized Model
+## Approach B: INT8 RKNN (best performance)
-Since the post-processing results of YOLO models are not quantization-friendly, and some structures do not perform well on NPU, modifying the post-processing structure of the model is necessary to quantize it to INT8 type.
-The output structure needs to be modified to use the post-processing code in RKNN_Model_Zoo.
+Quantizing YOLO models to INT8 usually requires removing in-model post-processing and using external post-processing (Model Zoo style).
-Rockchip provides repositories for modifying the structure of different versions of YOLO models, and the usage is straightforward:
+Rockchip provides optimized conversion repos for different YOLO versions:
-| Model | Repository | README |
+| Model | Repo | README |
| ---------- | ------------------------------------------------ | -------------------------------------------------------------------------- |
| yolov5 | https://github.com/airockchip/yolov5 | https://github.com/airockchip/yolov5/blob/master/README_rkopt.md |
| yolov6 | https://github.com/airockchip/YOLOv6 | https://github.com/airockchip/YOLOv6/blob/main/deploy/RKNN/RKOPT_README.md |
@@ -198,296 +154,43 @@ Rockchip provides repositories for modifying the structure of different versions
| yolo11 | https://github.com/airockchip/ultralytics_yolo11 | https://github.com/airockchip/ultralytics_yolo11/blob/main/RKOPT_README.md |
| YOLO-World | https://github.com/airockchip/YOLO-World | https://github.com/airockchip/YOLO-World/blob/master/RKNN_README_EN.md |
-Continuing with the example of using a self-trained yolo11 model, we will convert the self-trained best.pt to an INT8 quantized RKNN model.
-
-#### Modifying the Model Structure
-
-Clone the corresponding model repository from the table:
-
-
-
-```bash
-git clone https://github.com/airockchip/ultralytics_yolo11.git && cd ultralytics_yolo11
-```
-
-
-
-Create a virtual environment:
-
-
-
-```bash
-python3 -m venv .venv
-source .venv/bin/activate
-pip3 install ultralytics onnx
-```
-
-
-
-Follow the README of the corresponding model in the table to modify the model structure and export the ONNX model. Here's an example for yolo11:
+Follow the README for your model family to export an optimized ONNX, then convert to INT8 RKNN with your calibration dataset.
-Modify the model path in `default.yaml`:
+Example (YOLO11):
```bash
-vim ./ultralytics/cfg/default.yaml
+git clone https://github.com/airockchip/ultralytics_yolo11.git
```
-
-

- Model path modification location
-
-
-Use the script to modify the structure and export the ONNX model:
-
-
-
-```bash
-export PYTHONPATH=./
-python3 ./ultralytics/engine/exporter.py
-```
-
-
-
-```bash
-(.venv) (base) zifeng@vms-max:~/Job/git_clone/rknn_yolo/ultralytics_yolo11$ python ./ultralytics/engine/exporter.py
-Ultralytics 8.3.9 🚀 Python-3.8.2 torch-2.4.1+cu121 CPU (Intel Core(TM) i9-14900KF)
-YOLO11n summary (fused): 238 layers, 2,582,347 parameters, 0 gradients, 6.3 GFLOPs
-
-PyTorch: starting from '/home/zifeng/Job/sda1/customer/sanskarjainba-hub/best.pt' with input shape (16, 3, 640, 640) BCHW and output shape(s) ((16, 64, 80, 80), (16, 1, 80, 80), (16, 1, 80, 80), (16, 64, 40, 40), (16, 1, 40, 40), (16, 1, 40, 40), (16, 64, 20, 20), (16, 1, 20, 20), (16, 1, 20, 20)) (5.2 MB)
-
-RKNN: starting export with torch 2.4.1+cu121...
-
-RKNN: feed /home/zifeng/Job/sda1/customer/sanskarjainba-hub/best.onnx to RKNN-Toolkit or RKNN-Toolkit2 to generate RKNN model.
-Refer https://github.com/airockchip/rknn_model_zoo/tree/main/examples/
-RKNN: export success ✅ 0.4s, saved as '/home/zifeng/Job/sda1/customer/sanskarjainba-hub/best.onnx' (9.9 MB)
-
-Export complete (1.5s)
-Results saved to /mnt/sda1/customer/sanskarjainba-hub
-Predict: yolo predict task=detect model=/home/zifeng/Job/sda1/customer/sanskarjainba-hub/best.onnx imgsz=640
-Validate: yolo val task=detect model=/home/zifeng/Job/sda1/customer/sanskarjainba-hub/best.onnx imgsz=640 data=E:\Sanskar_Jain\APC\apc_lappy\OAK\Final_dataset\data.yaml
-Visualize: https://netron.app
-```
-
-
-

- Modified model structure
-
-
-#### Convert to INT8 RKNN Model
-
-Use `rknn_model_zoo/examples/yolo11/python/convert.py` to perform quantization and compilation of the model:
-
-
-
-```bash
-python3 convert.py best.onnx rk3588 i8 best.rknn
-```
-
-
-
-```bash
-(rknn) zifeng@vms-max:~/Job/git_clone/rknn_model_zoo/examples/yolo11/python$ python3 convert.py /home/zifeng/Job/sda1/customer/sanskarjainba-hub/best.onnx rk3588 i8 /home/zifeng/Job/sda1/customer/sanskarjainba-hub/best.rknn
-I rknn-toolkit2 version: 2.3.2
---> Config model
-done
---> Loading model
-I Loading : 100%|█████████████████████████████████████████████| 174/174 [00:00<00:00, 114300.53it/s]
-done
---> Building model
-I OpFusing 0: 100%|█████████████████████████████████████████████| 100/100 [00:00<00:00, 1704.81it/s]
-I OpFusing 1 : 100%|█████████████████████████████████████████████| 100/100 [00:00<00:00, 721.25it/s]
-I OpFusing 0 : 100%|█████████████████████████████████████████████| 100/100 [00:00<00:00, 379.96it/s]
-I OpFusing 1 : 100%|█████████████████████████████████████████████| 100/100 [00:00<00:00, 363.89it/s]
-I OpFusing 2 : 100%|█████████████████████████████████████████████| 100/100 [00:00<00:00, 205.95it/s]
-W build: found outlier value, this may affect quantization accuracy
- const name abs_mean abs_std outlier value
- model.0.conv.weight 2.91 2.62 -16.472
- model.23.cv3.0.0.0.conv.weight 0.33 0.62 -10.566
-I GraphPreparing : 100%|███████████████████████████████████████| 220/220 [00:00<00:00, 29180.54it/s]
-I Quantizating : 100%|████████████████████████████████████████████| 220/220 [00:03<00:00, 70.28it/s]
-W build: The default input dtype of 'images' is changed from 'float32' to 'int8' in rknn model for performance!
- Please take care of this change when deploy rknn model with Runtime API!
-W build: The default output dtype of '462' is changed from 'float32' to 'int8' in rknn model for performance!
- Please take care of this change when deploy rknn model with Runtime API!
-W build: The default output dtype of 'onnx::ReduceSum_476' is changed from 'float32' to 'int8' in rknn model for performance!
- Please take care of this change when deploy rknn model with Runtime API!
-W build: The default output dtype of '480' is changed from 'float32' to 'int8' in rknn model for performance!
- Please take care of this change when deploy rknn model with Runtime API!
-W build: The default output dtype of '487' is changed from 'float32' to 'int8' in rknn model for performance!
- Please take care of this change when deploy rknn model with Runtime API!
-W build: The default output dtype of 'onnx::ReduceSum_501' is changed from 'float32' to 'int8' in rknn model for performance!
- Please take care of this change when deploy rknn model with Runtime API!
-W build: The default output dtype of '505' is changed from 'float32' to 'int8' in rknn model for performance!
- Please take care of this change when deploy rknn model with Runtime API!
-W build: The default output dtype of '512' is changed from 'float32' to 'int8' in rknn model for performance!
- Please take care of this change when deploy rknn model with Runtime API!
-W build: The default output dtype of 'onnx::ReduceSum_526' is changed from 'float32' to 'int8' in rknn model for performance!
- Please take care of this change when deploy rknn model with Runtime API!
-W build: The default output dtype of '530' is changed from 'float32' to 'int8' in rknn model for performance!
- Please take care of this change when deploy rknn model with Runtime API!
-I rknn building ...
-I rknn building done.
-done
---> Export rknn model
-done
-```
-
-#### Modify the example code in RKNN_Model_Zoo
-
-- Modify the code in `rknn_model_zoo/py_utils/rknn_executor.py`, **please back up the original code**
-
- Please configure the RKNN Model Zoo code repository according to [Optional: Install RKNN Model Zoo on the board](./rknn_install#Optional-Install-RKNN-Model-Zoo-on-the-board)
-
-
-
- ```python
- from rknnlite.api import RKNNLite as RKNN
-
- class RKNN_model_container():
- def __init__(self, model_path, target=None, device_id=None) -> None:
- rknn = RKNN()
- rknn.load_rknn(model_path)
- ret = rknn.init_runtime()
- self.rknn = rknn
-
- def run(self, inputs):
- if self.rknn is None:
- print("ERROR: rknn has been released")
- return []
-
- if isinstance(inputs, list) or isinstance(inputs, tuple):
- pass
- else:
- inputs = [inputs]
-
- result = self.rknn.inference(inputs=inputs)
-
- return result
-
- def release(self):
- self.rknn.release()
- self.rknn = None
- ```
-
-
-
-- Modify line 262 in `rknn_model_zoo/examples/yolo11/python/yolo11.py` (**please back up the original code**)
-
-
-
- ```python
- 262 outputs = model.run([np.expand_dims(input_data, 0)])
- ```
-
-
-
-- Modify `CLASSES` and `coco_id_list`
-
- Update the `CLASSES` variable according to the categories in your trained model:
-
-
-
- ```python
- CLASSES = ["person"]
- ```
-
-
-
-#### Inference with ONNX Model
-
-Copy the modified ONNX model to the device and use the RKNN Model Zoo code for inference to verify if there are any issues with the modified model structure.
-
-
-
-```bash
-cd rknn_model_zoo/examples/yolo11/python
-python3 yolo11.py --model_path ../model/best.onnx --img_folder ../test_img/ --img_save
-```
-
-
-
-```bash
-(.venv) rock@rock-5b-plus:~/ssd/rknn/rknn_model_zoo/examples/yolo11/python$ python3 yolo11.py --model_path ../model/best.onnx --img_folder ../test_img/ --img_save
-2025-11-25 10:27:21.165203468 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card1/device/vendor"
-/mnt/ssd/rknn/rknn_model_zoo/py_utils/onnx_executor.py:12: FutureWarning: In the future `np.bool` will be defined as the corresponding NumPy scalar.
- if getattr(np, 'bool', False):
-Model-../model/best.onnx is onnx model, starting val
-WARNING: reshape inputdata-0: from (1, 3, 640, 640) to [1, 3, 640, 640]
-112.08ms
-
-IMG: frame_00304.jpg
-PERSON @ (534 389 739 548) 0.729
-PERSON @ (535 244 741 390) 0.336
-PERSON @ (205 86 350 162) 0.256
-Detection result save to ./result/frame_00304.jpg
-```
-
-The CPU inference time for the modified ONNX model is **112.08 ms**, and the results are correct.
+After converting, run inference on-device using RKNN Model Zoo-style post-processing.

- Inference result of best.onnx
+ ONNX inference result (after structure changes)
-#### Inference with INT8 RKNN Model
-
-
-
-```bash
-cd rknn_model_zoo/examples/yolo11/python
-python3 yolo11.py --model_path ./best.rknn --img_folder ./test_img --img_save
-```
-
-
-
-```bash
-(.venv) rock@rock-5b-plus:~/ssd/rknn/rknn_model_zoo/examples/yolo11/python$ python3 yolo11.py --model_path ../model/best.onnx --img_folder ../test_img/ --img_save
-2025-11-25 10:27:21.165203468 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card1/device/vendor"
-/mnt/ssd/rknn/rknn_model_zoo/py_utils/onnx_executor.py:12: FutureWarning: In the future `np.bool` will be defined as the corresponding NumPy scalar.
- if getattr(np, 'bool', False):
-Model-../model/best.onnx is onnx model, starting val
-WARNING: reshape inputdata-0: from (1, 3, 640, 640) to [1, 3, 640, 640]
-18.74ms
-IMG: frame_00304.jpg
-PERSON @ (206 85 349 160) 0.341
-PERSON @ (534 242 740 400) 0.327
-Detection result save to ./result/frame_00304.jpg
-```
-
-The NPU inference time for the INT8 quantized RKNN model is **18.74 ms**, and the results are consistent with the ONNX model.
-

- Inference result of best.rknn
+ INT8 RKNN inference result
-## Inference Performance Comparison
+## Performance summary (example)
-| Model | Type | Backend | Time |
-| -------------- | ---- | ------- | --------- |
-| best.pt | fp32 | CPU | 268.8 ms |
-| best.onnx | fp32 | CPU | 112.08 ms |
-| best_fp.rknn | fp16 | NPU | 64.3 ms |
-| best_int8.rknn | INT8 | NPU | 18.74 ms |
+| Model | Type | Backend | Time |
+| ---------------- | ---- | ------- | --------- |
+| `best.pt` | FP32 | CPU | 268.8 ms |
+| `best.onnx` | FP32 | CPU | 112.08 ms |
+| `best_fp.rknn` | FP16 | NPU | 64.3 ms |
+| `best_int8.rknn` | INT8 | NPU | 18.74 ms |
-From the table data, we can conclude that the self-trained yolo11n model's inference performance on ROCK 5B+ improved from 268.8ms to 18.74ms after porting to NPU, achieving a 14x performance improvement while maintaining the same recognition accuracy.
+In this example, moving a custom YOLO11n model from CPU to NPU improves inference time from **268.8 ms** to **18.74 ms** (~14× speedup) while keeping comparable detection results.
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_stable_diffusion_convert.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_stable_diffusion_convert.mdx
index 13a76bfe5..236f264ea 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_stable_diffusion_convert.mdx
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/_stable_diffusion_convert.mdx
@@ -1,21 +1,20 @@
-Stable Diffusion is a text-to-image generation model based on latent diffusion. By progressively adding and removing noise in latent space, it turns random noise into images that match a text prompt.
-In recent years, Stable Diffusion has continued to evolve, with many optimized variants emerging in the open-source community, significantly improving generation quality, speed, and computational efficiency.
-This demo uses **Stable Diffusion LCM Dreamshaper V7**, a lightweight variant that combines Latent Consistency Model (LCM) acceleration to maintain high-quality image generation with very few inference steps, producing clear images in just **4 steps**.
-This guide walks through how to deploy the model to the NPU on Rockchip chips using the RKNN toolchain, enabling efficient, low-latency on-device image generation.
+Stable Diffusion is a text-to-image generation model based on latent diffusion. It gradually adds and removes noise in latent space to turn random noise into images that match a text prompt.
+In recent years, Stable Diffusion has evolved rapidly, with many community-optimized variants that improve quality, speed, and efficiency.
+This guide uses **Stable Diffusion LCM Dreamshaper V7**, a lightweight variant that applies Latent Consistency Model (LCM) acceleration to generate high-quality images with very few steps (as few as **4 steps**).
+This document shows how to deploy the model to the NPU on Rockchip SoCs using the RKNN toolchain for efficient, low-latency on-device generation.
:::tip
-This document uses RK3588 and Dreamshaper V7 as an example to demonstrate how to deploy a text-to-image model on the Rockchip NPU.
-You need to set up the RKNN-related environment on your PC in advance.
-For detailed environment setup steps, refer to [RKNN Installation](./rknn_install).
+This document uses RK3588 and Dreamshaper V7 as an example.
+You need to set up the RKNN environment on your PC first. See [RKNN Installation](./rknn-install).
:::
-## Download model files
+## Model Download
-**Radxa provides converted RKNN models and executables with an output resolution of 256x256. You can download and use them directly by following the steps below.**
+**Radxa provides pre-converted RKNN models and runnable files (output resolution: 256×256). You can download and use them directly:**
-- **Download model files using modelscope**
+- **Download the model files using `modelscope`**
- - **Create a directory to store the model files**
+ - **Create a directory for the model files**
@@ -25,18 +24,18 @@ mkdir sd-lcm-rknn && cd sd-lcm-rknn
-- **Install the modelscope package via pip**
+- **Install `modelscope` via pip**
```bash
-# Use a recent Python version to avoid compatibility issues
+# Use a recent Python version to avoid compatibility issues.
pip3 install modelscope
```
-- **Download the Stable-Diffusion-LCM_RKNN repository**
+- **Download the `Stable-Diffusion-LCM_RKNN` package**
@@ -46,13 +45,13 @@ modelscope download --model radxa/Stable-Diffusion-LCM_RKNN
-## (Optional) Convert the model
+## Model Conversion (Optional)
-**If you want to set a custom output resolution, you can convert the model yourself. Follow the steps below.**
+**If you want a different output resolution, you can convert the model yourself:**
-- **Download the ONNX model from HuggingFace and convert it to an RKNN model**
+- **Download the ONNX model from Hugging Face and convert it to RKNN**
- - **Create a directory to store the model files**
+ - **Create a directory for the model files**
@@ -67,7 +66,7 @@ mkdir sd-lcm-rknn && cd sd-lcm-rknn
```bash
-# Install git lfs if it is not already available
+# Requires git lfs. Install it first if needed.
git lfs install
git clone https://huggingface.co/thanhtantran/Stable-Diffusion-1.5-LCM-ONNX-RKNN2
```
@@ -84,27 +83,30 @@ conda activate your_rknn_env
-- **Optionally run _run_onnx-lcm.py_ to validate the model**
+- **Optionally run `run_onnx-lcm.py` to verify the ONNX model**
```bash
+# Use -h to view help.
python run_onnx-lcm.py -i ./model -o ./images --prompt "Majestic mountain landscape with snow-capped peaks, autumn foliage in vibrant reds and oranges, a turquoise river winding through a valley, crisp and serene atmosphere, ultra-realistic style."
```
-- **Run _convert-onnx-to-rknn.py_ to convert the model**
+- **Run `convert-onnx-to-rknn.py` to convert the model**
```bash
+# Use -h to view help. Replace N with your desired resolution.
+# The converted model will only output at that resolution.
python convert-onnx-to-rknn.py -i ./model -r NxN
```
-- **Organize the files into the following directory structure, then proceed to the next step**
+- **Arrange files in the following directory layout**
```txt
---sd-lcm-rknn
@@ -123,11 +125,11 @@ python convert-onnx-to-rknn.py -i ./model -r NxN
---run_rknn-lcm.py
```
-## Deploy on the device
+## On-device Deployment
-- **Copy the converted RKNN models and executables to the device**
+- **Copy the RKNN models and runtime files to the device**
- - **Enter the corresponding directory on the device**
+ - **Enter the directory on the device**
@@ -172,24 +174,24 @@ pip3 install diffusers pillow "numpy<2.0" torch transformers rknn-toolkit-lite2
```bash
-# Use -h to view help. If you converted the model yourself, update the resolution argument accordingly.
+# Use -h to view help. If you converted the model yourself, adjust the resolution accordingly.
python ./run_rknn-lcm.py -i ./model -o ./images -s 256x256 --prompt "Majestic mountain landscape with snow-capped peaks, autumn foliage in vibrant reds and oranges, a turquoise river winding through a valley, crisp and serene atmosphere, ultra-realistic style."
```
-## Results and performance
+## Results and Performance
-- **256x256 output image on the device**
+- **Example output (256×256 on-device)**

-- **Single-run performance (for reference only):**
+- **Single-run timing (for reference only):**
```txt
-Text encoder load time: Took 0.7 seconds.
-UNet load time: Took 2.8 seconds.
-VAE decoder load time: Took 0.4 seconds.
+text_encoder load time: Took 0.7 seconds.
+unet load time: Took 2.8 seconds.
+vae_decoder load time: Took 0.4 seconds.
Prompt encoding time: 0.08s
Inference time: 4.55s
Decode time: 3.15s
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_clip.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_clip.mdx
new file mode 100644
index 000000000..97461bbf6
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_clip.mdx
@@ -0,0 +1,194 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/clip/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python/images/
+python convert.py ../../model/clip_images.onnx ${TARGET_PLATFORM}
+cd ../text/
+python convert.py ../../model/clip_text.onnx ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d clip
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_clip_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_clip_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_clip_demo ./model/clip_images.rknn ./model/dog_224x224.jpg ./model/clip_text.rknn ./model/text.txt
+```
+
+
+
+```bash
+$ ./rknn_clip_demo ./model/clip_images.rknn ./model/dog_224x224.jpg ./model/clip_text.rknn ./model/text.txt
+--> init clip image model
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=pixel_values, n_dims=4, dims=[1, 224, 224, 3], n_elems=150528, size=301056, fmt=NHWC, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=image_embeds, n_dims=2, dims=[1, 512], n_elems=512, size=1024, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+model is NHWC input fmt
+input image height=224, input image width=224, input image channel=3
+--> init clip text model
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=input_ids, n_dims=2, dims=[1, 20], n_elems=20, size=160, fmt=UNDEFINED, type=INT64, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=text_embeds, n_dims=2, dims=[1, 512], n_elems=512, size=1024, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+model is UNDEFINED input fmt
+input text batch size=1, input sequence length=20
+origin size=224x224 crop size=224x224
+input image: 224 x 224, subsampling: 4:2:0, colorspace: YCbCr, orientation: 1
+num_lines=2
+--> inference clip image model
+rga_api version 1.10.1_[0]
+rknn_run
+--> inference clip text model
+rknn_run
+rknn_run
+--> rknn clip demo result
+images: ./model/dog_224x224.jpg
+text : a photo of a dog
+score : 0.989
+```
+
+### Test Image
+
+
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+python clip.py --img_model ../model/clip_images.rknn --text_model ../model/clip_text.rknn --target ${TARGET_PLATFORM}
+```
+
+
+
+```bash
+$ python clip.py --img_model ../model/clip_images.rknn --text_model ../model/clip_text.rknn --target rk3588
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+I target set by user is: rk3588
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+I rknn-toolkit2 version: 2.3.2
+I target set by user is: rk3588
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+--> rknn clip demo result:
+images: ../model/dog_224x224.jpg
+text : a photo of dog
+score : 0.990
+```
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_deeplabv3.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_deeplabv3.mdx
new file mode 100644
index 000000000..f4ceaa96d
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_deeplabv3.mdx
@@ -0,0 +1,216 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/deeplabv3/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+:::info
+如果提示加载模型错误,需要使用下面的命令安装 TensorFlow 库:
+
+```bash
+pip3 install 'tensorflow>=1.12.0,<=2.16.0rc0'
+```
+
+:::
+
+
+
+```bash
+cd ../python/
+python convert.py ../model/deeplab-v3-plus-mobilenet-v2.pb ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d deeplabv3
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_deeplabv3_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+:::info
+**依赖说明**:本示例的 C API 实现需要 libOpenCL.so 库支持。在 RK3588/RK356X 平台上,可使用 Mali GPU 驱动中的 libmali.so.1.9.0 库替代。
+:::
+
+RK3588/RK356X 设备在 Debian 12 (Bookworm) 系统中默认使用 Panfrost/Panthor GPU 驱动,需要先切换到 Mali GPU 驱动。
+
+> 参考文档:[切换 GPU 驱动](../radxa-os/mali-gpu)
+
+创建软链接(将 libmali.so.1.9.0 链接为 libOpenCL.so)。
+
+
+
+```bash
+cd rknn_deeplabv3_demo/lib/
+ln -s /usr/lib/aarch64-linux-gnu/libmali.so.1.9.0 libOpenCL.so
+```
+
+
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd ..
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_deeplabv3_demo ./model/deeplab-v3-plus-mobilenet-v2.rknn ./model/test_image.jpg
+```
+
+
+
+```bash
+$ ./rknn_deeplabv3_demo ./model/deeplab-v3-plus-mobilenet-v2.rknn ./model/test_image.jpg
+arm_release_ver: g24p0-00eac0, rk_so_ver: 3
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=sub_7:0, n_dims=4, dims=[1, 513, 513, 3], n_elems=789507, size=789507, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=0, scale=0.007843
+output tensors:
+ index=0, name=logits/semantic/BiasAdd:0, n_dims=4, dims=[1, 65, 65, 21], n_elems=88725, size=88725, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-109, scale=0.100937
+model is NHWC input fmt
+model input height=513, width=513, channel=3
+origin size=513x513 crop size=512x512
+input image: 513 x 513, subsampling: 4:4:4, colorspace: YCbCr, orientation: 1
+model is NHWC input fmt
+output_mems-> fd = 12, offset = 0, size = 354900
+post_buf_mem-> fd = 13, offset = 0, size = 263169
+rknn_run
+write_image path: out.png width=513 height=513 channel=3 data=0x33a04740
+```
+
+### 效果展示
+
+
+
+## Python API
+
+### Activate the virtual environment
+
+:::info
+**依赖说明**:本示例的 Python API 实现依赖 Matplotlib 库,通过下面的命令安装。
+
+```bash
+pip install matplotlib
+```
+
+:::
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+python deeplabv3.py --model_path ../model/deeplab-v3-plus-mobilenet-v2.rknn --target ${TARGET_PLATFORM}
+```
+
+
+
+```bash
+$ python deeplabv3.py --model_path ../model/deeplab-v3-plus-mobilenet-v2.rknn --target rk3588
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+done
+--> Init runtime environment
+I target set by user is: rk3588
+done
+--> Running model
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+--> done
+```
+
+### 效果展示
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_lprnet.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_lprnet.mdx
new file mode 100644
index 000000000..efa4f26b7
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_lprnet.mdx
@@ -0,0 +1,173 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/LPRNet/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python/
+python convert.py ../model/lprnet.onnx ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d LPRNet
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_lprnet_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_lprnet_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_lprnet_demo ./model/lprnet.rknn ./model/test.jpg
+```
+
+
+
+```bash
+$ ./rknn_lprnet_demo ./model/lprnet.rknn ./model/test.jpg
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=input, n_dims=4, dims=[1, 24, 94, 3], n_elems=6768, size=6768, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=0, scale=0.007843
+output tensors:
+ index=0, name=output, n_dims=3, dims=[1, 68, 18], n_elems=1224, size=1224, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=50, scale=0.643529
+model is NHWC input fmt
+model input height=24, width=94, channel=3
+origin size=94x24 crop size=80x16
+input image: 94 x 24, subsampling: 4:2:0, colorspace: YCbCr, orientation: 1
+rknn_run
+Plate recognition result: 湘F6CL03
+```
+
+### Test Image
+
+
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+python lprnet.py --model_path ../model/lprnet.rknn --target ${TARGET_PLATFORM}
+```
+
+
+
+```bash
+$ python lprnet.py --model_path ../model/lprnet.rknn --target rk3588
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+done
+rk3588
+--> Init runtime environment
+I target set by user is: rk3588
+done
+--> Running model
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+--> PostProcess
+Plate recognition result: 湘F6CL03
+```
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_mobilenet.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_mobilenet.mdx
new file mode 100644
index 000000000..c40bf6cdd
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_mobilenet.mdx
@@ -0,0 +1,161 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/mobilenet/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+:::tip
+After running the conversion script, you can see the model output.
+:::
+
+
+
+```bash
+cd ../python/
+python mobilenet.py --model ../model/mobilenetv2-12.onnx --target ${TARGET_PLATFORM}
+```
+
+
+
+Model output:
+
+```bash
+-----TOP 5-----
+[494] score=0.98 class="n03017168 chime, bell, gong"
+[653] score=0.01 class="n03764736 milk can"
+[469] score=0.00 class="n02939185 caldron, cauldron"
+[505] score=0.00 class="n03063689 coffeepot"
+[463] score=0.00 class="n02909870 bucket, pail"
+```
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d mobilenet
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_mobilenet_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_mobilenet_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_mobilenet_demo ./model/mobilenet_v2.rknn ./model/bell.jpg
+```
+
+
+
+```bash
+$ ./rknn_mobilenet_demo ./model/mobilenet_v2.rknn ./model/bell.jpg
+num_lines=1001
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=input, n_dims=4, dims=[1, 224, 224, 3], n_elems=150528, size=150528, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=0.018658
+output tensors:
+ index=0, name=output, n_dims=2, dims=[1, 1000, 0, 0], n_elems=1000, size=1000, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-55, scale=0.141923
+model is NHWC input fmt
+model input height=224, width=224, channel=3
+origin size=500x333 crop size=496x320
+input image: 500 x 333, subsampling: 4:4:4, colorspace: YCbCr, orientation: 1
+src width is not 4/16-aligned, convert image use cpu
+finish
+rknn_run
+[494] score=0.993227 class=n03017168 chime, bell, gong
+[469] score=0.002560 class=n02939185 caldron, cauldron
+[747] score=0.000466 class=n04023962 punching bag, punch bag, punching ball, punchball
+[792] score=0.000466 class=n04208210 shovel
+[618] score=0.000405 class=n03633091 ladle
+```
+
+This output indicates the model classifies this image as **bell, gong** 的图片。
+
+### Test Image
+
+
+
+## Python API
+
+This example does not provide a standalone Python API script; you can implement one yourself.
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_mobilesam.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_mobilesam.mdx
new file mode 100644
index 000000000..0a0921772
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_mobilesam.mdx
@@ -0,0 +1,201 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/mobilesam/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+:::info
+目前 MobileSAM 模型只支持 rk3562 和 rk3588 平台。
+:::
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python/decoder/
+python convert.py ../../model/mobilesam_decoder.onnx ${TARGET_PLATFORM}
+cd ../encoder/
+python convert.py ../../model/mobilesam_decoder.onnx ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d mobilesam
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_mobilesam_demo user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_mobilesam_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_mobilesam_demo ./model/mobilesam_encoder.rknn ./model/picture.jpg ./model/mobilesam_decoder.rknn ./model/coords.txt ./model/labels.txt
+```
+
+
+
+```bash
+$ ./rknn_mobilesam_demo ./model/mobilesam_encoder.rknn ./model/picture.jpg ./model/mobilesam_decoder.rknn ./model/coords.txt ./model/labels.txt
+--> init mobilesam encoder model
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=input.1, n_dims=4, dims=[1, 448, 448, 3], n_elems=602112, size=1204224, fmt=NHWC, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=2044, n_dims=4, dims=[1, 256, 28, 28], n_elems=200704, size=401408, fmt=NCHW, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+model is NHWC input fmt
+input image height=448, input image width=448, input image channel=3
+--> init mobilesam decoder model
+model input num: 5, output num: 2
+input tensors:
+ index=0, name=image_embeddings, n_dims=4, dims=[1, 28, 28, 256], n_elems=200704, size=401408, fmt=NHWC, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+ index=1, name=point_coords, n_dims=3, dims=[1, 2, 2], n_elems=4, size=8, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+ index=2, name=point_labels, n_dims=2, dims=[1, 2], n_elems=2, size=4, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+ index=3, name=mask_input, n_dims=4, dims=[1, 112, 112, 1], n_elems=12544, size=25088, fmt=NHWC, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+ index=4, name=has_mask_input, n_dims=1, dims=[1], n_elems=1, size=2, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=iou_predictions, n_dims=2, dims=[1, 4], n_elems=4, size=8, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+ index=1, name=low_res_masks, n_dims=4, dims=[1, 4, 112, 112], n_elems=50176, size=100352, fmt=NCHW, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+model is NHWC input fmt
+input image height=28, input image width=28, input image channel=256
+origin size=769x770 crop size=768x768
+input image: 769 x 770, subsampling: 4:2:0, colorspace: YCbCr, orientation: 1
+num_lines=2
+num_lines=2
+--> inference mobilesam encoder model
+src_height:770, src_width:769
+newh:448 neww:447 padh:0 padw:1
+rknn_run
+--> inference mobilesam decoder model
+rknn_run
+write_image path: out.png width=769 height=770 channel=3 data=0xffff79479010
+```
+
+### 效果展示
+
+
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+将相关文件Copy to the device执行下面的命令
+
+
+
+```bash
+python mobilesam.py --encoder ../model/mobilesam_encoder.rknn --decoder ../model/mobilesam_decoder.rknn --target ${TARGET_PLATFORM}
+```
+
+
+
+```bash
+$ python mobilesam.py --encoder ../model/mobilesam_encoder.rknn --decoder ../model/mobilesam_decoder.rknn --target rk3588
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+I target set by user is: rk3588
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+I rknn-toolkit2 version: 2.3.2
+I target set by user is: rk3588
+[ WARN:0@3.457] global loadsave.cpp:848 imwrite_ Unsupported depth image for selected encoder is fallbacked to CV_8U.
+result save to result.jpg
+```
+
+### 效果展示
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_ppocr.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_ppocr.mdx
new file mode 100644
index 000000000..8e81b6d96
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_ppocr.mdx
@@ -0,0 +1,248 @@
+PP-OCR 是一套灵活的 OCR 解决方案,既支持检测与识别模块的独立调用,也支持端到端的系统集成。
+本案例将演示如何利用 Rockchip 平台的计算资源,部署并运行这套“图片进、文字出”的高性能文字识别系统。
+
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/PPOCR/PPOCR-Det/model/
+bash download_model.sh
+cd ../../PPOCR-Rec/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python
+python convert.py ../model/ppocrv4_rec.onnx ${TARGET_PLATFORM}
+cd ../../PPOCR-Det/python/
+python convert.py ../model/ppocrv4_det.onnx ${TARGET_PLATFORM}
+```
+
+
+
+将转换好的模型拷贝到 PPOCR-System/model 目录下。
+
+
+
+```bash
+cd ../../PPOCR-System/model/
+cp ../../PPOCR-Det/model/ppocrv4_det.rknn ./
+cp ../../PPOCR-Rec/model/ppocrv4_rec.rknn ./
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d PPOCR-System
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_PPOCR-System_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_PPOCR-System_demo
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_ppocr_system_demo ./model/ppocrv4_det.rknn ./model/ppocrv4_rec.rknn ./model/test.jpg
+```
+
+
+
+```bash
+$ ./rknn_ppocr_system_demo ./model/ppocrv4_det.rknn ./model/ppocrv4_rec.rknn ./model/test.jpg
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=x, n_dims=4, dims=[1, 480, 480, 3], n_elems=691200, size=691200, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=0.018658
+output tensors:
+ index=0, name=sigmoid_0.tmp_0, n_dims=4, dims=[1, 1, 480, 480], n_elems=230400, size=230400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+model is NHWC input fmt
+model input height=480, width=480, channel=3
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=x, n_dims=4, dims=[1, 48, 320, 3], n_elems=46080, size=92160, fmt=NHWC, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=softmax_11.tmp_0, n_dims=3, dims=[1, 40, 6625, 0], n_elems=265000, size=530000, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+model is NHWC input fmt
+model input height=48, width=320, channel=3
+origin size=500x500 crop size=496x496
+input image: 500 x 500, subsampling: 4:4:4, colorspace: YCbCr, orientation: 1
+src width is not 4/16-aligned, convert image use cpu
+finish
+DRAWING OBJECT
+[0] @ [(28, 37), (302, 39), (301, 71), (27, 69)]
+recognize result: 纯臻营养护发素, score=0.711077
+[1] @ [(26, 82), (172, 82), (172, 104), (26, 104)]
+recognize result: 产品信息/参数, score=0.709612
+[2] @ [(27, 112), (332, 112), (332, 134), (27, 134)]
+recognize result: (45元/每公斤,100公斤起订), score=0.691406
+[3] @ [(28, 142), (282, 144), (281, 163), (27, 162)]
+recognize result: 每瓶22元,1000瓶起订), score=0.706613
+[4] @ [(25, 179), (298, 177), (300, 194), (26, 195)]
+recognize result: 【品牌】:代加工方式/OEMODM, score=0.704963
+[5] @ [(26, 209), (234, 209), (234, 228), (26, 228)]
+recognize result: 【品名】:纯臻营养护发素, score=0.710124
+[6] @ [(26, 240), (241, 240), (241, 259), (26, 259)]
+recognize result: 【产品编号】:YM-X-3011, score=0.703522
+[7] @ [(413, 233), (429, 233), (429, 305), (413, 305)]
+recognize result: ODMOEM, score=0.708415
+[8] @ [(25, 270), (179, 270), (179, 289), (25, 289)]
+recognize result: 【净含量】:220ml, score=0.707519
+[9] @ [(26, 303), (252, 303), (252, 321), (26, 321)]
+recognize result: 【适用人群】:适合所有肤质, score=0.709698
+[10] @ [(26, 333), (341, 333), (341, 351), (26, 351)]
+recognize result: 【主要成分】:鲸蜡硬脂醇、燕麦β-葡聚, score=0.689684
+[11] @ [(27, 363), (283, 365), (282, 384), (26, 382)]
+recognize result: 糖、椰油酰胺丙基甜菜碱、泛酸, score=0.691807
+[12] @ [(368, 368), (476, 368), (476, 388), (368, 388)]
+recognize result: (成品包材), score=0.706706
+[13] @ [(27, 394), (362, 396), (361, 414), (26, 413)]
+recognize result: 【主要功能】:可紧致头发磷层,从而达到, score=0.696854
+[14] @ [(27, 428), (371, 428), (371, 446), (27, 446)]
+recognize result: 即时持久改善头发光泽的效果,给干燥的头, score=0.711040
+[15] @ [(27, 459), (136, 459), (136, 478), (27, 478)]
+recognize result: 发足够的滋养, score=0.711344
+ SAVE TO ./out.jpg
+write_image path: ./out.jpg width=500 height=500 channel=3 data=0x2bf82010
+```
+
+### 效果展示
+
+
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+:::info
+**依赖说明**:运行下面的命令安装依赖。
+
+```bash
+pip install shapely pyclipper
+```
+
+:::
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+python ppocr_system.py --det_model_path ../model/ppocrv4_det.rknn --rec_model_path ../model/ppocrv4_rec.rknn --target ${TARGET_PLATFORM}
+```
+
+
+
+```bash
+$ python ppocr_system.py --det_model_path ../model/ppocrv4_det.rknn --rec_model_path ../model/ppocrv4_rec.rknn --target rk3588
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+--> Init runtime environment
+I target set by user is: rk3588
+done
+Model-../model/ppocrv4_det.rknn is rknn model, starting val
+I rknn-toolkit2 version: 2.3.2
+--> Init runtime environment
+I target set by user is: rk3588
+done
+Model-../model/ppocrv4_rec.rknn is rknn model, starting val
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+[[('纯臻营养护发素', 0.7113560438156128)], [('产品信息/参数', 0.7074497938156128)], [('(45元/每公斤,100公斤起订)', 0.6900849938392639)], [('每瓶22元,1000瓶起订)', 0.7073799967765808)], [('【品牌】:代加工方式/OEMODM', 0.7077493071556091)], [('【品名】:纯臻营养护发素', 0.7105305790901184)], [('【产品编号】:YM-X-3011', 0.705413818359375)], [('ODM OEM', 0.6839424967765808)], [('【净含量】:220ml', 0.7086736559867859)], [('【适用人群】:适合所有肤质', 0.7099984884262085)], [('【主要成分】:鲸蜡硬脂醇、燕麦β-葡聚', 0.6929739117622375)], [('糖、椰油酰胺丙基甜菜碱、泛酸', 0.6709420084953308)], [('(成品包材)', 0.708251953125)], [('【主要功能】:可紧致头发磷层,从而达到', 0.7064401507377625)], [('即时持久改善头发光泽的效果,给干燥的头', 0.7103207111358643)], [('发足够的滋养', 0.7110188603401184)]]
+```
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_ppyoloe.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_ppyoloe.mdx
new file mode 100644
index 000000000..ebff4b02f
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_ppyoloe.mdx
@@ -0,0 +1,209 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/ppyoloe/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python/
+python convert.py ../model/ppyoloe_s.onnx ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d ppyoloe
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_ppyoloe_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_ppyoloe_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_ppyoloe_demo ./model/ppyoloe.rknn ./model/bus.jpg
+```
+
+
+
+```bash
+$ ./rknn_ppyoloe_demo ./model/ppyoloe.rknn ./model/bus.jpg
+load label ./model/coco_80_labels_list.txt
+model input num: 1, output num: 9
+input tensors:
+ index=0, name=image, n_dims=4, dims=[1, 640, 640, 3], n_elems=1228800, size=1228800, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+output tensors:
+ index=0, name=conv2d_176.tmp_1, n_dims=4, dims=[1, 68, 20, 20], n_elems=27200, size=27200, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-42, scale=0.072882
+ index=1, name=sigmoid_2.tmp_0, n_dims=4, dims=[1, 80, 20, 20], n_elems=32000, size=32000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003802
+ index=2, name=clip_0.tmp_0, n_dims=4, dims=[1, 1, 20, 20], n_elems=400, size=400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+ index=3, name=conv2d_182.tmp_1, n_dims=4, dims=[1, 68, 40, 40], n_elems=108800, size=108800, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-47, scale=0.085614
+ index=4, name=sigmoid_5.tmp_0, n_dims=4, dims=[1, 80, 40, 40], n_elems=128000, size=128000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003736
+ index=5, name=clip_1.tmp_0, n_dims=4, dims=[1, 1, 40, 40], n_elems=1600, size=1600, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+ index=6, name=conv2d_188.tmp_1, n_dims=4, dims=[1, 68, 80, 80], n_elems=435200, size=435200, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-43, scale=0.102416
+ index=7, name=sigmoid_8.tmp_0, n_dims=4, dims=[1, 80, 80, 80], n_elems=512000, size=512000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003279
+ index=8, name=clip_2.tmp_0, n_dims=4, dims=[1, 1, 80, 80], n_elems=6400, size=6400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+model is NHWC input fmt
+model input height=640, width=640, channel=3
+origin size=640x640 crop size=640x640
+input image: 640 x 640, subsampling: 4:2:0, colorspace: YCbCr, orientation: 1
+scale=1.000000 dst_box=(0 0 639 639) allow_slight_change=1 _left_offset=0 _top_offset=0 padding_w=0 padding_h=0
+rga_api version 1.10.1_[0]
+rknn_run
+person @ (108 232 224 536) 0.943
+person @ (478 232 561 519) 0.928
+person @ (211 240 283 512) 0.916
+bus @ (88 135 552 442) 0.909
+person @ (78 326 125 516) 0.527
+handbag @ (261 339 281 413) 0.420
+handbag @ (253 342 264 380) 0.272
+write_image path: out.png width=640 height=640 channel=3 data=0x6b49a30
+```
+
+### 效果展示
+
+
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+python ppyoloe.py --model_path ./model/ppyoloe.rknn --target ${TARGET_PLATFORM} --img_folder ./model/ --img_save
+```
+
+
+
+```bash
+$ python ppyoloe.py --model_path ./model/ppyoloe.rknn --target rk3588 --img_folder ./model/ --img_save
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+--> Init runtime environment
+I target set by user is: rk3588
+done
+Model-./model/ppyoloe.rknn is rknn model, starting val
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+
+
+IMG: bus.jpg
+class: person, score: 0.942828357219696
+box coordinate left,top,right,down: [108, 232, 224, 536]
+class: person, score: 0.9276214241981506
+box coordinate left,top,right,down: [478, 232, 561, 519]
+class: person, score: 0.916216254234314
+box coordinate left,top,right,down: [211, 240, 283, 512]
+class: person, score: 0.5267177820205688
+box coordinate left,top,right,down: [78, 326, 125, 516]
+class: handbag, score: 0.41975846886634827
+box coordinate left,top,right,down: [261, 339, 281, 413]
+class: handbag, score: 0.27218714356422424
+box coordinate left,top,right,down: [253, 342, 264, 380]
+class: bus , score: 0.9086127877235413
+box coordinate left,top,right,down: [88, 135, 552, 442]
+Detection result save to ./result/bus.jpg
+```
+
+### 效果展示
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_resnet.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_resnet.mdx
new file mode 100644
index 000000000..2c38110cb
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_resnet.mdx
@@ -0,0 +1,160 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/resnet/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+:::tip
+After running the conversion script, you can see the model output.
+:::
+
+
+
+```bash
+cd ../python/
+python resnet.py ../model/resnet50-v2-7.onnx ${TARGET_PLATFORM}
+```
+
+
+
+Model output:
+
+```bash
+-----TOP 5-----
+[155] score=0.83 class="n02086240 Shih-Tzu"
+[154] score=0.14 class="n02086079 Pekinese, Pekingese, Peke"
+[262] score=0.02 class="n02112706 Brabancon griffon"
+[283] score=0.00 class="n02123394 Persian cat"
+[152] score=0.00 class="n02085782 Japanese spaniel"
+```
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d resnet
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_resnet_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_resnet_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_resnet_demo ./model/resnet50-v2-7.rknn ./model/dog_224x224.jpg
+```
+
+
+
+```bash
+$ ./rknn_resnet_demo ./model/resnet50-v2-7.rknn ./model/dog_224x224.jpg
+num_lines=1001
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=data, n_dims=4, dims=[1, 224, 224, 3], n_elems=150528, size=150528, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=0.018658
+output tensors:
+ index=0, name=resnetv24_dense0_fwd, n_dims=2, dims=[1, 1000, 0, 0], n_elems=1000, size=1000, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-62, scale=0.146746
+model is NHWC input fmt
+model input height=224, width=224, channel=3
+origin size=224x224 crop size=224x224
+input image: 224 x 224, subsampling: 4:2:0, colorspace: YCbCr, orientation: 1
+rga_api version 1.10.1_[0]
+rknn_run
+[155] score=0.792182 class=n02086240 Shih-Tzu
+[154] score=0.182606 class=n02086079 Pekinese, Pekingese, Peke
+[262] score=0.013012 class=n02112706 Brabancon griffon
+[152] score=0.002237 class=n02085782 Japanese spaniel
+[283] score=0.001931 class=n02123394 Persian cat
+```
+
+This output indicates the model classifies this image as **Shih-Tzu** (西施狗)的图片。
+
+### Test Image
+
+
+
+## Python API
+
+This example does not provide a standalone Python API script; you can implement one yourself.
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_retinaface.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_retinaface.mdx
new file mode 100644
index 000000000..95a002bb6
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_retinaface.mdx
@@ -0,0 +1,207 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/RetinaFace/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python/
+python convert.py ../model/RetinaFace_mobile320.onnx ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d RetinaFace
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_RetinaFace_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_RetinaFace_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_retinaface_demo ./model/RetinaFace.rknn ./model/test.jpg
+```
+
+
+
+```bash
+$ ./rknn_retinaface_demo ./model/RetinaFace.rknn ./model/test.jpg
+model input num: 1, output num: 3
+input tensors:
+ index=0, name=input0, n_dims=4, dims=[1, 320, 320, 3], n_elems=307200, size=307200, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=1.074510
+output tensors:
+ index=0, name=output0, n_dims=3, dims=[1, 4200, 4, 0], n_elems=16800, size=16800, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=0, scale=0.044699
+ index=1, name=572, n_dims=3, dims=[1, 4200, 2, 0], n_elems=8400, size=16800, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+ index=2, name=571, n_dims=3, dims=[1, 4200, 10, 0], n_elems=42000, size=42000, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-22, scale=0.086195
+model is NHWC input fmt
+model input height=320, width=320, channel=3
+origin size=640x427 crop size=640x416
+input image: 640 x 427, subsampling: 4:2:0, colorspace: YCbCr, orientation: 1
+scale=0.500000 dst_box=(0 54 319 265) allow_slight_change=1 _left_offset=0 _top_offset=54 padding_w=0 padding_h=108
+rga_api version 1.10.1_[0]
+fill dst image (x y w h)=(0 0 320 320) with color=0x72727272
+ RgaCollorFill(1819) RGA_COLORFILL fail: Invalid argument
+ RgaCollorFill(1820) RGA_COLORFILL fail: Invalid argument
+161 im2d_rga_impl rga_task_submit(2171): Failed to call RockChipRga interface, please use 'dmesg' command to view driver error log.
+161 im2d_rga_impl rga_dump_channel_info(1500): src_channel:
+ rect[x,y,w,h] = [0, 0, 0, 0]
+ image[w,h,ws,hs,f] = [0, 0, 0, 0, rgba8888]
+ buffer[handle,fd,va,pa] = [0, 0, 0, 0]
+ color_space = 0x0, global_alpha = 0x0, rd_mode = 0x0
+
+161 im2d_rga_impl rga_dump_channel_info(1500): dst_channel:
+ rect[x,y,w,h] = [0, 0, 320, 320]
+ image[w,h,ws,hs,f] = [320, 320, 320, 320, rgb888]
+ buffer[handle,fd,va,pa] = [2, 0, 0, 0]
+ color_space = 0x0, global_alpha = 0xff, rd_mode = 0x1
+
+161 im2d_rga_impl rga_dump_opt(1550): opt version[0x0]:
+
+161 im2d_rga_impl rga_dump_opt(1551): set_core[0x0], priority[0]
+
+161 im2d_rga_impl rga_dump_opt(1554): color[0x72727272]
+161 im2d_rga_impl rga_dump_opt(1563):
+
+161 im2d_rga_impl rga_task_submit(2180): acquir_fence[-1], release_fence_ptr[0x0], usage[0x280000]
+
+rknn_run
+face @(302 72 476 296) score=0.999023
+write_image path: result.jpg width=640 height=427 channel=3 data=0x6dfc610
+```
+
+### 效果展示
+
+
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+python RetinaFace.py --model_path ../model/RetinaFace.rknn --target ${TARGET_PLATFORM}
+```
+
+
+
+```bash
+$ python RetinaFace.py --model_path ../model/RetinaFace.rknn --target rk3588
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+done
+--> Init runtime environment
+I target set by user is: rk3588
+done
+--> Running model
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+image_size: (320, 320) num_priors= 4200
+face @ (302 74 478 300) 0.999023
+save image in ./result.jpg
+```
+
+### 效果展示
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_rknn_model_zoo.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_rknn_model_zoo.mdx
new file mode 100644
index 000000000..3e64867b1
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_rknn_model_zoo.mdx
@@ -0,0 +1,138 @@
+RKNN Model Zoo 基于 RKNPU SDK 工具链开发,提供了目前主流算法的部署示例。包含导出 RKNN 模型, 使用 Python API,C API 推理 RKNN 模型的流程。
+
+RKNN Model Zoo 依赖 RKNN-Toolkit2 进行模型转换, 编译 C API demo 时需要用到对应的编译工具链。
+
+## 下载仓库
+
+
+
+```bash
+mkdir RKSDK && cd RKSDK
+git clone -b v2.3.2 https://github.com/airockchip/rknn_model_zoo.git
+```
+
+
+
+## 仓库目录结构
+
+```bash
+./
+├── 3rdparty
+├── asset
+├── build
+├── build-android.sh
+├── build-linux.sh
+├── datasets
+├── docs
+├── examples # 示例目录
+│ ├── clip
+│ ├── deeplabv3
+│ ├── lite_transformer
+│ ├── LPRNet
+│ ├── mms_tts
+│ ├── mobilenet
+│ ├── mobilesam
+│ ├── PPOCR
+│ ├── ppseg
+│ ├── ppyoloe
+│ ├── resnet
+│ ├── RetinaFace
+│ ├── wav2vec2
+│ ├── whisper
+│ ├── yamnet
+│ ├── yolo11
+│ ├── yolov10
+│ ├── yolov5
+│ ├── yolov5_seg
+│ ├── yolov6
+│ ├── yolov7
+│ ├── yolov8
+│ ├── yolov8_obb
+│ ├── yolov8_pose
+│ ├── yolov8_seg
+│ ├── yolo_world
+│ ├── yolox
+│ └── zipformer
+├── FAQ_CN.md
+├── FAQ.md
+├── install
+├── LICENSE
+├── py_utils
+│ ├── coco_utils.py
+│ ├── __init__.py
+│ ├── onnx_executor.py
+│ ├── pytorch_executor.py
+│ └── rknn_executor.py
+├── README_CN.md
+├── README.md
+├── scaling_frequency.sh
+└── utils
+ ├── audio_utils.c
+ ├── audio_utils.h
+ ├── CMakeLists.txt
+ ├── common.h
+ ├── file_utils.c
+ ├── file_utils.h
+ ├── font.h
+ ├── image_drawing.c
+ ├── image_drawing.h
+ ├── image_utils.c
+ └── image_utils.h
+```
+
+## 基本使用流程
+
+### C API
+
+使用根目录下的 build-linux.sh 脚本进行编译。
+
+想要在 x64 主机上编译出能在 arm64 设备运行的可执行程序,你需要下载交叉编译工具链。
+
+点击下载:[交叉编译工具链](https://developer.arm.com/-/media/files/downloads/gnu/11.2-2022.02/binrel/gcc-arm-11.2-2022.02-x86_64-aarch64-none-linux-gnu.tar.xz?rev=33c6e30e5ac64e6dba8f0431f2c35f1b&revision=33c6e30e-5ac6-4e6d-ba8f-0431f2c35f1b&hash=632C6C0BD43C3E4B59CA8A09A7055D30)。
+
+下载完成之后解压即可。
+
+使用脚本前需要导出编译器路径到环境变量,让脚本能找到下载的交叉编译器。
+
+
+
+```bash
+export GCC_COMPILER=/path/to/your/gcc/bin/aarch64-linux-gnu
+```
+
+
+
+脚本基本使用格式:
+
+
+
+```bash
+./build-linux.sh -t -a -d [-b ] [-m]
+ -t : target (rk356x/rk3588)
+ -a : arch (aarch64)
+ -d : demo name
+ -b : build_type(Debug/Release)
+ -m : enable address sanitizer, build_type need set to Debug
+Note: 'rk356x' represents rk3562/rk3566/rk3568.
+
+# 以编译 RK3566 的 yolov5 demo 为例:
+./build-linux.sh -t rk356x -a aarch64 -d yolov5
+```
+
+
+
+### Python API
+
+Activate the virtual environment,将模型转换为 rknn 格式之后进入目标示例目录直接运行对应的 python 脚本即可。
+
+以 RK3566 的 yolov5 demo 为例:
+
+
+
+```bash
+conda activate rknn
+cd examples/yolov5/python/
+python3 yolov5.py --target rk356x --img_show
+```
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_wav2vec2.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_wav2vec2.mdx
new file mode 100644
index 000000000..6e7570474
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_wav2vec2.mdx
@@ -0,0 +1,172 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/wav2vec2/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python/
+python convert.py ../model/wav2vec2_base_960h_20s.onnx ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d wav2vec2
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_wav2vec2_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_wav2vec2_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_wav2vec2_demo ./model/wav2vec2_base_960h_20s.rknn ./model/test.wav
+```
+
+
+
+```bash
+$ ./rknn_wav2vec2_demo ./model/wav2vec2_base_960h_20s.rknn ./model/test.wav
+-- read_audio & convert_channels & resample_audio use: 0.616000 ms
+-- audio_preprocess use: 0.464000 ms
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=input, n_dims=2, dims=[1, 320000], n_elems=320000, size=640000, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=output, n_dims=3, dims=[1, 999, 32], n_elems=31968, size=63936, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+-- init_wav2vec2_model use: 705.586975 ms
+-- inference_wav2vec2_model use: 3297.358887 ms
+
+Wav2vec2 output: MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL
+
+Real Time Factor (RTF): 3.297 / 20.000 = 0.165
+```
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+python wav2vec2.py --model_path ../model/wav2vec2_base_960h_20s.rknn --target ${TARGET_PLATFORM}
+```
+
+
+
+```bash
+$ python wav2vec2.py --model_path ../model/wav2vec2_base_960h_20s.rknn --target rk3588
+2026-01-16 09:12:33.885150713 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card1/device/vendor"
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+--> Loading model
+done
+--> Init runtime environment
+I target set by user is: rk3588
+done
+W inference: Inputs should be placed in a list, like [img1, img2], both the img1 and img2 are ndarray.
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+
+Wav2vec2 output: MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL
+```
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_whisper.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_whisper.mdx
new file mode 100644
index 000000000..996d4189f
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_whisper.mdx
@@ -0,0 +1,262 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/whisper/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python/
+python convert.py ../model/whisper_encoder_base_20s.onnx ${TARGET_PLATFORM}
+python convert.py ../model/whisper_decoder_base_20s.onnx ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d whisper
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_whisper_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_whisper_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+# 中文语音
+./rknn_whisper_demo ./model/whisper_encoder_base_20s.rknn ./model/whisper_decoder_base_20s.rknn zh ./model/test_zh.wav
+# 英文语音
+./rknn_whisper_demo ./model/whisper_encoder_base_20s.rknn ./model/whisper_decoder_base_20s.rknn en ./model/test_en.wav
+```
+
+
+
+中文语音:
+
+```bash
+$ ./rknn_whisper_demo ./model/whisper_encoder_base_20s.rknn ./model/whisper_decoder_base_20s.rknn zh ./model/test_zh.wav
+-- read_audio & convert_channels & resample_audio use: 6.659000 ms
+-- read_mel_filters & read_vocab use: 54.120998 ms
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=x, n_dims=3, dims=[1, 80, 2000], n_elems=160000, size=320000, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=out, n_dims=3, dims=[1, 1000, 512], n_elems=512000, size=1024000, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+-- init_whisper_encoder_model use: 199.550995 ms
+model input num: 2, output num: 1
+input tensors:
+ index=0, name=tokens, n_dims=2, dims=[1, 12], n_elems=12, size=96, fmt=UNDEFINED, type=INT64, qnt_type=AFFINE, zp=0, scale=1.000000
+ index=1, name=audio, n_dims=3, dims=[1, 1000, 512], n_elems=512000, size=1024000, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=out, n_dims=3, dims=[1, 12, 51865], n_elems=622380, size=1244760, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+-- init_whisper_decoder_model use: 282.627014 ms
+-- inference_whisper_model use: 1656.614014 ms
+
+Whisper output: 对我做了介绍,我想说的是大家如果对我的研究感兴趣
+
+Real Time Factor (RTF): 1.657 / 5.611 = 0.295
+```
+
+英文语音:
+
+```bash
+$ ./rknn_whisper_demo ./model/whisper_encoder_base_20s.rknn ./model/whisper_decoder_base_20s.rknn en ./model/test_en.wav
+-- read_audio & convert_channels & resample_audio use: 2.198000 ms
+-- read_mel_filters & read_vocab use: 60.438000 ms
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=x, n_dims=3, dims=[1, 80, 2000], n_elems=160000, size=320000, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=out, n_dims=3, dims=[1, 1000, 512], n_elems=512000, size=1024000, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+-- init_whisper_encoder_model use: 121.598999 ms
+model input num: 2, output num: 1
+input tensors:
+ index=0, name=tokens, n_dims=2, dims=[1, 12], n_elems=12, size=96, fmt=UNDEFINED, type=INT64, qnt_type=AFFINE, zp=0, scale=1.000000
+ index=1, name=audio, n_dims=3, dims=[1, 1000, 512], n_elems=512000, size=1024000, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=out, n_dims=3, dims=[1, 12, 51865], n_elems=622380, size=1244760, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+-- init_whisper_decoder_model use: 222.567993 ms
+-- inference_whisper_model use: 1372.854980 ms
+
+Whisper output: Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.
+
+Real Time Factor (RTF): 1.373 / 5.855 = 0.234
+```
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+:::info
+**依赖说明**:运行下面的命令安装依赖。
+
+```bash
+pip install soundfile
+```
+
+:::
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+# 中文语音
+python whisper.py --encoder_model_path ../model/whisper_encoder_base_20s.rknn --decoder_model_path ../model/whisper_decoder_base_20s.rknn --task zh --audio_path ../model/test_zh.wav --target ${TARGET_PLATFORM}
+# 英文语音
+python whisper.py --encoder_model_path ../model/whisper_encoder_base_20s.rknn --decoder_model_path ../model/whisper_decoder_base_20s.rknn --task en --audio_path ../model/test_en.wav --target ${TARGET_PLATFORM}
+```
+
+
+
+中文语音:
+
+```bash
+$ python whisper.py --encoder_model_path ../model/whisper_encoder_base_20s.rknn --decoder_model_path ../model/whisper_decoder_base_20s.rknn --task zh --audio_path ../model/test_zh.wav --target rk3588
+2026-01-16 08:54:55.503119681 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card1/device/vendor"
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+--> Loading model
+done
+--> Init runtime environment
+I target set by user is: rk3588
+done
+I rknn-toolkit2 version: 2.3.2
+--> Loading model
+done
+--> Init runtime environment
+I target set by user is: rk3588
+done
+W inference: Inputs should be placed in a list, like [img1, img2], both the img1 and img2 are ndarray.
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+
+Whisper output: 对我做了介绍,我想说的是,如果对我的研究感兴趣
+```
+
+英文语音:
+
+```bash
+$ python whisper.py --encoder_model_path ../model/whisper_encoder_base_20s.rknn --decoder_model_path ../model/whisper_decoder_base_20s.rknn --task en --audio_path ../model/test_en.wav --target rk3588
+2026-01-16 08:54:35.451693658 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card1/device/vendor"
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+--> Loading model
+done
+--> Init runtime environment
+I target set by user is: rk3588
+done
+I rknn-toolkit2 version: 2.3.2
+--> Loading model
+done
+--> Init runtime environment
+I target set by user is: rk3588
+done
+W inference: Inputs should be placed in a list, like [img1, img2], both the img1 and img2 are ndarray.
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+
+Whisper output: Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.
+```
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolo_world.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolo_world.mdx
new file mode 100644
index 000000000..1c426539c
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolo_world.mdx
@@ -0,0 +1,288 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/yolo_world/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python/
+python convert.py ../model/clip_text.onnx ${TARGET_PLATFORM}
+python convert.py ../model/yolo_world_v2s.onnx ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d yolo_world
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_yolo_world_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_yolo_world_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_yolo_world_demo ./model/clip_text.rknn ./model/detect_classes.txt ./model/yolo_world_v2s.rknn ./model/bus.jpg
+```
+
+
+
+```bash
+$ ./rknn_yolo_world_demo ./model/clip_text.rknn ./model/detect_classes.txt ./model/yolo_world_v2s.rknn ./model/bus.jpg
+--> init clip text model
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=input_ids, n_dims=2, dims=[1, 20], n_elems=20, size=160, fmt=UNDEFINED, type=INT64, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=text_embeds, n_dims=2, dims=[1, 512], n_elems=512, size=1024, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+load label ./model/detect_classes.txt
+--> init yolo world model
+model input num: 2, output num: 6
+input tensors:
+ index=0, name=images, n_dims=4, dims=[1, 640, 640, 3], n_elems=1228800, size=1228800, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+ index=1, name=texts, n_dims=3, dims=[1, 80, 512], n_elems=40960, size=40960, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-52, scale=0.003410
+output tensors:
+ index=0, name=1168, n_dims=4, dims=[1, 80, 80, 80], n_elems=512000, size=512000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003214
+ index=1, name=1076, n_dims=4, dims=[1, 4, 80, 80], n_elems=25600, size=25600, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.054310
+ index=2, name=1170, n_dims=4, dims=[1, 80, 40, 40], n_elems=128000, size=128000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003697
+ index=3, name=1121, n_dims=4, dims=[1, 4, 40, 40], n_elems=6400, size=6400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.057563
+ index=4, name=1172, n_dims=4, dims=[1, 80, 20, 20], n_elems=32000, size=32000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003884
+ index=5, name=1166, n_dims=4, dims=[1, 4, 20, 20], n_elems=1600, size=1600, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.058563
+model is NHWC input fmt
+model input height=640, width=640, channel=3
+num_lines=80
+origin size=640x640 crop size=640x640
+input image: 640 x 640, subsampling: 4:2:0, colorspace: YCbCr, orientation: 1
+--> inference clip text model
+rknn_run_1
+rknn_run_2
+rknn_run_3
+rknn_run_4
+rknn_run_5
+rknn_run_6
+rknn_run_7
+rknn_run_8
+rknn_run_9
+rknn_run_10
+rknn_run_11
+rknn_run_12
+rknn_run_13
+rknn_run_14
+rknn_run_15
+rknn_run_16
+rknn_run_17
+rknn_run_18
+rknn_run_19
+rknn_run_20
+rknn_run_21
+rknn_run_22
+rknn_run_23
+rknn_run_24
+rknn_run_25
+rknn_run_26
+rknn_run_27
+rknn_run_28
+rknn_run_29
+rknn_run_30
+rknn_run_31
+rknn_run_32
+rknn_run_33
+rknn_run_34
+rknn_run_35
+rknn_run_36
+rknn_run_37
+rknn_run_38
+rknn_run_39
+rknn_run_40
+rknn_run_41
+rknn_run_42
+rknn_run_43
+rknn_run_44
+rknn_run_45
+rknn_run_46
+rknn_run_47
+rknn_run_48
+rknn_run_49
+rknn_run_50
+rknn_run_51
+rknn_run_52
+rknn_run_53
+rknn_run_54
+rknn_run_55
+rknn_run_56
+rknn_run_57
+rknn_run_58
+rknn_run_59
+rknn_run_60
+rknn_run_61
+rknn_run_62
+rknn_run_63
+rknn_run_64
+rknn_run_65
+rknn_run_66
+rknn_run_67
+rknn_run_68
+rknn_run_69
+rknn_run_70
+rknn_run_71
+rknn_run_72
+rknn_run_73
+rknn_run_74
+rknn_run_75
+rknn_run_76
+rknn_run_77
+rknn_run_78
+rknn_run_79
+rknn_run_80
+--> inference yolo world model
+scale=1.000000 dst_box=(0 0 639 639) allow_slight_change=1 _left_offset=0 _top_offset=0 padding_w=0 padding_h=0
+rga_api version 1.10.1_[0]
+rknn_run
+person @ (475 234 559 519) 0.948
+person @ (110 237 226 535) 0.948
+bus @ (96 135 551 436) 0.932
+person @ (212 240 283 510) 0.917
+person @ (80 326 125 514) 0.665
+write_image path: out.png width=640 height=640 channel=3 data=0xffff8189b010
+```
+
+### 效果展示
+
+
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+python yolo_world.py --text_model ../model/clip_text.rknn --yolo_world ../model/yolo_world_v2s.rknn --target ${TARGET_PLATFORM}
+```
+
+
+
+```bash
+$ python yolo_world.py --text_model ../model/clip_text.rknn --yolo_world ../model/yolo_world_v2s.rknn --target rk3588
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+I target set by user is: rk3588
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+I rknn-toolkit2 version: 2.3.2
+I target set by user is: rk3588
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+ class score xmin, ymin, xmax, ymax
+--------------------------------------------------
+ person 0.948 [ 477, 232, 559, 521]
+ person 0.932 [ 110, 236, 226, 536]
+ person 0.917 [ 212, 240, 283, 510]
+ person 0.595 [ 80, 327, 126, 514]
+ bus 0.917 [ 98, 135, 553, 435]
+Save results to result.jpg!
+```
+
+### 效果展示
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolov8.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolov8.mdx
new file mode 100644
index 000000000..3122360e4
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolov8.mdx
@@ -0,0 +1,198 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/yolov8/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python/
+python convert.py ../model/yolov8n.onnx ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d yolov8
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_yolov8_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_yolov8_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_yolov8_demo model/yolov8.rknn model/bus.jpg
+```
+
+
+
+```bash
+$ ./rknn_yolov8_demo model/yolov8.rknn model/bus.jpg
+load label ./model/coco_80_labels_list.txt
+model input num: 1, output num: 9
+input tensors:
+ index=0, name=images, n_dims=4, dims=[1, 640, 640, 3], n_elems=1228800, size=1228800, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+output tensors:
+ index=0, name=318, n_dims=4, dims=[1, 64, 80, 80], n_elems=409600, size=409600, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-58, scale=0.117659
+ index=1, name=onnx::ReduceSum_326, n_dims=4, dims=[1, 80, 80, 80], n_elems=512000, size=512000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003104
+ index=2, name=331, n_dims=4, dims=[1, 1, 80, 80], n_elems=6400, size=6400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003173
+ index=3, name=338, n_dims=4, dims=[1, 64, 40, 40], n_elems=102400, size=102400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-45, scale=0.093747
+ index=4, name=onnx::ReduceSum_346, n_dims=4, dims=[1, 80, 40, 40], n_elems=128000, size=128000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003594
+ index=5, name=350, n_dims=4, dims=[1, 1, 40, 40], n_elems=1600, size=1600, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003627
+ index=6, name=357, n_dims=4, dims=[1, 64, 20, 20], n_elems=25600, size=25600, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-34, scale=0.083036
+ index=7, name=onnx::ReduceSum_365, n_dims=4, dims=[1, 80, 20, 20], n_elems=32000, size=32000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003874
+ index=8, name=369, n_dims=4, dims=[1, 1, 20, 20], n_elems=400, size=400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+model is NHWC input fmt
+model input height=640, width=640, channel=3
+origin size=640x640 crop size=640x640
+input image: 640 x 640, subsampling: 4:2:0, colorspace: YCbCr, orientation: 1
+scale=1.000000 dst_box=(0 0 639 639) allow_slight_change=1 _left_offset=0 _top_offset=0 padding_w=0 padding_h=0
+rga_api version 1.10.1_[0]
+rknn_run
+person @ (211 241 282 506) 0.864
+bus @ (96 136 549 449) 0.864
+person @ (109 235 225 535) 0.860
+person @ (477 226 560 522) 0.848
+person @ (79 327 116 513) 0.306
+write_image path: out.png width=640 height=640 channel=3 data=0x32a58800
+```
+
+### Results
+
+
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+python yolov8.py --model_path ./model/yolov8.rknn --target ${TARGET_PLATFORM} --img_folder ./model --img_save
+```
+
+
+
+```bash
+$ python yolov8.py --model_path ./model/yolov8.rknn --target rk3588 --img_folder ./model --img_save
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+--> Init runtime environment
+I target set by user is: rk3588
+done
+Model-./model/yolov8.rknn is rknn model, starting val
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+
+
+IMG: bus.jpg
+person @ (211 241 282 506) 0.864
+person @ (109 235 225 535) 0.860
+person @ (477 226 560 522) 0.848
+person @ (79 327 116 513) 0.306
+bus @ (96 136 549 449) 0.864
+Detection result save to ./result/bus.jpg
+```
+
+### Results
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolov8_seg.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolov8_seg.mdx
new file mode 100644
index 000000000..ec58b2695
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/ai/rockchip/_yolov8_seg.mdx
@@ -0,0 +1,209 @@
+## Environment Setup
+
+:::info
+Follow [RKNN Installation](./rknn-install) to set up the environment.
+
+Follow [RKNN Model Zoo](./rknn-model-zoo) to download the example files.
+:::
+
+## Model Download
+
+Download the ONNX model file.
+
+
+
+```bash
+cd rknn_model_zoo/examples/yolov8_seg/model/
+bash download_model.sh
+```
+
+
+
+## Model Conversion
+
+Select the target platform.
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk3588
+```
+
+
+
+
+
+
+
+
+
+```bash
+export TARGET_PLATFORM=rk356x
+```
+
+
+
+
+
+
+
+Convert the ONNX model to an RKNN model.
+
+
+
+```bash
+cd ../python/
+python convert.py ../model/yolov8s-seg.onnx ${TARGET_PLATFORM}
+```
+
+
+
+## C API
+
+### Build the Example
+
+Go to the rknn_model_zoo directory and run build-linux.sh to build.
+
+
+
+```bash
+cd ../../..
+bash build-linux.sh -t ${TARGET_PLATFORM} -a aarch64 -d yolov8_seg
+```
+
+
+
+### Sync Files to the Device
+
+Copy the built demo directory under the install folder to the device.
+
+
+
+```bash
+cd install/${TARGET_PLATFORM}_linux_aarch64/
+scp -r rknn_yolov8_seg_demo/ user@your_device_ip:target_directory
+```
+
+
+
+### Run the Example
+
+Export the runtime libraries to the environment variable.
+
+
+
+```bash
+cd rknn_yolov8_seg_demo/
+export LD_LIBRARY_PATH=./lib
+```
+
+
+
+Run the example.
+
+
+
+```bash
+./rknn_yolov8_seg_demo ./model/yolov8_seg.rknn ./model/bus.jpg
+```
+
+
+
+```bash
+$ ./rknn_yolov8_seg_demo ./model/yolov8_seg.rknn ./model/bus.jpg
+[RKNN] Can not find libdrm.so
+load label ./model/coco_80_labels_list.txt
+model input num: 1, output num: 13
+input tensors:
+ index=0, name=images, n_dims=4, dims=[1, 640, 640, 3], n_elems=1228800, size=1228800, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+output tensors:
+ index=0, name=375, n_dims=4, dims=[1, 64, 80, 80], n_elems=409600, size=409600, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-61, scale=0.115401
+ index=1, name=onnx::ReduceSum_383, n_dims=4, dims=[1, 80, 80, 80], n_elems=512000, size=512000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003514
+ index=2, name=388, n_dims=4, dims=[1, 1, 80, 80], n_elems=6400, size=6400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003540
+ index=3, name=354, n_dims=4, dims=[1, 32, 80, 80], n_elems=204800, size=204800, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=27, scale=0.019863
+ index=4, name=395, n_dims=4, dims=[1, 64, 40, 40], n_elems=102400, size=102400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-15, scale=0.099555
+ index=5, name=onnx::ReduceSum_403, n_dims=4, dims=[1, 80, 40, 40], n_elems=128000, size=128000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003555
+ index=6, name=407, n_dims=4, dims=[1, 1, 40, 40], n_elems=1600, size=1600, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003680
+ index=7, name=361, n_dims=4, dims=[1, 32, 40, 40], n_elems=51200, size=51200, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=30, scale=0.022367
+ index=8, name=414, n_dims=4, dims=[1, 64, 20, 20], n_elems=25600, size=25600, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-55, scale=0.074253
+ index=9, name=onnx::ReduceSum_422, n_dims=4, dims=[1, 80, 20, 20], n_elems=32000, size=32000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003813
+ index=10, name=426, n_dims=4, dims=[1, 1, 20, 20], n_elems=400, size=400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+ index=11, name=368, n_dims=4, dims=[1, 32, 20, 20], n_elems=12800, size=12800, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=43, scale=0.019919
+ index=12, name=347, n_dims=4, dims=[1, 32, 160, 160], n_elems=819200, size=819200, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-119, scale=0.032336
+model is NHWC input fmt
+model input height=640, width=640, channel=3
+origin size=640x640 crop size=640x640
+input image: 640 x 640, subsampling: 4:2:0, colorspace: YCbCr, orientation: 1
+scale=1.000000 dst_box=(0 0 639 639) allow_slight_change=1 _left_offset=0 _top_offset=0 padding_w=0 padding_h=0
+rga_api version 1.10.1_[0]
+rknn_run
+-- matmul_by_cpu_uint8 use: 13.651000 ms
+-- resize_by_opencv_uint8 use: 3.066000 ms
+-- crop_mask_uint8 use: 4.863000 ms
+-- seg_reverse use: 0.303000 ms
+bus @ (87 137 553 439) 0.911
+person @ (109 236 226 534) 0.900
+person @ (211 241 283 508) 0.869
+person @ (476 234 559 519) 0.866
+person @ (79 327 125 514) 0.540
+tie @ (248 284 259 310) 0.274
+write_image path: out.png width=640 height=640 channel=3 data=0xaaab07e88330
+```
+
+### 效果展示
+
+
+
+## Python API
+
+### Activate the virtual environment
+
+
+
+```bash
+conda activate rknn
+```
+
+
+
+### Run the Example
+
+Copy the related files to the device and run the following commands.
+
+
+
+```bash
+python yolov8_seg.py --model_path ../model/yolov8_seg.rknn --target ${TARGET_PLATFORM} --img_save
+```
+
+
+
+```bash
+$ python yolov8_seg.py --model_path ../model/yolov8_seg.rknn --target rk3588 --img_save
+/home/radxa/miniforge3/envs/rknn/lib/python3.12/site-packages/rknn/api/rknn.py:51: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ self.rknn_base = RKNNBase(cur_path, verbose)
+I rknn-toolkit2 version: 2.3.2
+--> Init runtime environment
+I target set by user is: rk3588
+done
+Model-../model/yolov8_seg.rknn is rknn model, starting val
+W inference: The 'data_format' is not set, and its default value is 'nhwc'!
+
+
+IMG: bus.jpg
+bus @ (87 137 553 439) 0.911
+person @ (108 236 227 537) 0.900
+person @ (211 241 283 508) 0.869
+person @ (477 232 559 519) 0.866
+person @ (79 327 125 514) 0.540
+tie @ (248 284 259 310) 0.274
+The segmentation results have been saved to ./result/bus.jpg
+```
+
+### 效果展示
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-deepseek-r1.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-deepseek-r1.mdx
index da776a653..1fa9ddc40 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-deepseek-r1.mdx
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-deepseek-r1.mdx
@@ -1,193 +1,160 @@
-[DeepSeek-R1](https://api-docs.deepseek.com/news/news250120) is developed by Hangzhou [DeepSeek](https://www.deepseek.com/), a company focused on AI research.
-This model fully open-sources all training techniques and model weights, with performance aligned to the closed-source OpenAI-o1.
-DeepSeek has distilled six smaller models from DeepSeek-R1 for the open-source community, including Qwen2.5 and Llama3.1.
-
-This document will guide you through deploying the DeepSeek-R1 distilled model [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) using RKLLM onto the RK3588 platform for hardware-accelerated inference using NPU.
+[DeepSeek-R1](https://api-docs.deepseek.com/news/news250120) is a state-of-the-art reasoning model developed by DeepSeek.
+DeepSeek has open-sourced the training approach and model weights, and its performance is competitive with closed-source reasoning models.
+DeepSeek also released multiple distilled open-source lightweight variants (covering the Qwen2.5 and Llama3.1 families) using knowledge distillation.
+This document demonstrates how to deploy the distilled **DeepSeek-R1-Distill-Qwen-1.5B** model to an RK3588 device with the RKLLM toolchain and run hardware-accelerated inference on the built-in NPU.

-## Model File Download
-
-:::tip
-Radxa has already provided precompiled rkllm models and executables. Users can directly download and use them. If you want to reference the compilation process, continue with the optional section below.
-:::
-
-- Use [git LFS](https://git-lfs.com/) to download the precompiled rkllm model from [ModelScope](https://modelscope.cn/models/radxa/DeepSeek-R1-Distill-Qwen-1.5B_RKLLM)
-
-
-
- ```bash
- git lfs install
- git clone https://www.modelscope.cn/radxa/DeepSeek-R1-Distill-Qwen-1.5B_RKLLM.git
- ```
-
-
-
-### (Optional) Model Compilation
-
-:::tip
-Please complete the RKLLM setup on both PC and development board according to [RKLLM Installation](./rkllm_install)
-:::
-:::tip
-For RK358X users, please specify the `rk3588` platform as TARGET_PLATFORM
-:::
-
-- On your x86 PC workstation, download the [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) model weights. If you haven't installed [git-lfs](https://git-lfs.com/), please install it first.
-
-
+## Quick Start
- ```bash
- git lfs install
- git clone https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
- ```
+### Download the demo
-
+Download the complete demo from ModelScope.
-- Activate the rkllm conda environment. You can refer to [RKLLM conda Installation](rkllm_install#x86-pc-workstation)
+
-
+```bash
+pip install -U modelscope
+modelscope download --model radxa/DeepSeek-R1-Distill-Qwen-1.5B_RKLLM
+```
- ```bash
- conda activate rkllm
- ```
+
-
+### Run the Example
-- Generate the quantization calibration file for the LLM model
+
-
+```bash
+cd demo_Linux_aarch64/
+export LD_LIBRARY_PATH=./lib
+./llm_demo ../DeepSeek-R1-Distill-Qwen-1.5B_W8A8_RK3588.rkllm 2048 4096
+```
- ```bash
- cd examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/export
- python3 generate_data_quant.py -m /path/to/DeepSeek-R1-Distill-Qwen-1.5B
- ```
+
-
+## Full Conversion Workflow
- | Parameter | Required | Description | Options |
- | --------- | -------- | -------------------------------- | ------- |
- | `path` | Required | Path to Huggingface model folder | N |
-
- `generate_data_quant.py` will generate a quantization file named `data_quant.json`.
-
-- Modify the model path in `rknn-llm/xamples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/export/export_rkllm.py`
-
-
-
- ```python
- 11 modelpath = '/path/to/DeepSeek-R1-Distill-Qwen-1.5B_Demo'
- ```
-
-
-
-- Adjust the maximum context length (`max_context`)
-
- If you have specific requirements for `max_context`, modify the value of the `max_context` parameter in the `llm.build` function within `rknn-llm/xamples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/export/export_rkllm.py`. The default is 4096. A higher value increases memory usage. Do not exceed 16,384, and ensure the value is a multiple of 32 (e.g., 32, 64, 96, ..., 16,384).
-
-- Run the model conversion script
-
-
+:::info[Prerequisites]
+Set up the development environment by following [RKLLM Installation](./rkllm-install).
+:::
- ```bash
- python3 export_rkllm.py
- ```
+:::warning[Version note]
+Running this example with RKLLM **1.2.3** may cause severe quality degradation (repetitive output).
+It is recommended to use RKLLM **1.2.2** for this demo. See: [GitHub Issue](https://github.com/airockchip/rknn-llm/issues/424).
+:::
-
+### Activate the virtual environment
- After successful conversion, you will obtain the rkllm model file `./DeepSeek-R1-Distill-Qwen-1.5B_W8A8_RK3588.rkllm`. From the naming convention, this model is W8A8 quantized and designed for the RK3588 platform.
+
-### (Optional) Build Executable
+```bash
+conda activate rkllm
+pip install -U huggingface_hub
+```
-- Download the cross-compilation toolchain [gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu](https://developer.arm.com/downloads/-/gnu-a/10-2-2020-11)
+
-- Modify the main program code in `rknn-llm/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/deploy/src/llm_demo.cpp`
+### Download the Model
- Comment out line 165. When converting the model, RKLLM automatically parses the `chat_template` field in the tokenizer_config.json file of the Hugging Face model, so no manual changes are required.
+
-
+```bash
+cd RK-SDK/rknn-llm/examples/rkllm_api_demo/
+hf download https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --local-dir ./DeepSeek-R1-Distill-Qwen-1.5B
+```
- ```vim
- 165 // rkllm_set_chat_template(llmHandle, "", "<|User|>", "<|Assistant|>");
- ```
+
-
+### Model Conversion
-- Update the `GCC_COMPILER_PATH` in the build script `rknn-llm/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/deploy/build-linux.sh`
+Generate a quantization calibration file and export the model to the RKLLM format.
-
+:::tip
+If you need a different `max_context` length, adjust the `max_context` parameter in the `llm.build` call in `export_rkllm.py`.
+The default is **4096**. Larger values use more memory. The value must be ≤ **16384** and a multiple of **32** (e.g., 32, 64, 96, …, 16384).
+:::
- ```vim
- 8 GCC_COMPILER_PATH=/path/to/gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu/bin/aarch64-none-linux-gnu
- ```
+
-
+```bash
+cd export/
+python generate_data_quant.py -m ../DeepSeek-R1-Distill-Qwen-1.5B -o ../DeepSeek-R1-Distill-Qwen-1.5B/data_quant.json
+# Before running, update the model path and calibration file path as needed.
+python export_rkllm.py
+```
-- Run the build script
+
-
+### Build the executable
- ```bash
- cd rknn-llm/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/deploy/
- bash build-linux.sh
- ```
+
-
+```bash
+cd ../deploy/
+# Export the cross-compiler path.
+export GCC_COMPILER=/path/to/your/gcc/bin/aarch64-linux-gnu
+bash build-linux.sh
+```
- The generated executable will be located at `install/demo_Linux_aarch64`.
+
-## Board-side Deployment
+The generated binaries are located at `install/demo_Linux_aarch64`.
-### Terminal Mode
+### Deploy to the device
-- Copy the converted `DeepSeek-R1-Distill-Qwen-1.5B_W8A8_RK3588.rkllm` model and the compiled `demo_Linux_aarch64` folder to the development board.
-- Set the environment variable
+Copy the converted model and the built `demo_Linux_aarch64` directory to the device.
-
+
- ```bash
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/demo_Linux_aarch64/lib
- ```
+```bash
+cd demo_Linux_aarch64/
+export RKLLM_LOG_LEVEL=1
+export LD_LIBRARY_PATH=./lib
+./llm_demo ../DeepSeek-R1-Distill-Qwen-1.5B_W8A8_RK3588.rkllm 2048 4096
+```
-
+
- :::tip
- Users who downloaded from ModelScope can directly export the `librkllmrt.so` from the downloaded repository.
- :::
+Run the demo. Type `exit` to quit.
-- Run the `llm_demo`. Type `exit` to quit.
+
-
+```bash
+./llm_demo ../DeepSeek-R1-Distill-Qwen-1.5B_W8A8_RK3588.rkllm 2048 4096
+```
- ```bash
- export RKLLM_LOG_LEVEL=1
- ## Usage: ./llm_demo model_path max_new_tokens max_context_len
- ./llm_demo /path/to/DeepSeek-R1-Distill-Qwen-1.5B_W8A8_RK3588.rkllm 2048 4096
- ```
+
-
+```bash
+$ ./llm_demo ../DeepSeek-R1-Distill-Qwen-1.5B_W8A8_RK3588.rkllm 2048 4096
+rkllm init start
+I rkllm: rkllm-runtime version: 1.2.2, rknpu driver version: 0.9.8, platform: RK3588
+...
+rkllm init success
- | Parameter | Required | Description | Options |
- | ----------------- | -------- | ---------------------------------- | ----------------------------- |
- | `path` | Required | Path to RKLLM model folder | N |
- | `max_new_tokens` | Required | Max number of tokens to generate | Must be ≤ max_context_len |
- | `max_context_len` | Required | Maximum context size for the model | Must be ≤ model's max_context |
+user: Solve x+y=14 and 2x+4y=38.
+assistant: x=9, y=5
+```
- 
+| Parameter | Required | Description | Notes |
+| ----------------- | -------- | ------------------------- | ------------------------------ |
+| `path` | Yes | Path to the RKLLM model | N/A |
+| `max_new_tokens` | Yes | Max generated tokens/turn | Must be ≤ `max_context_len` |
+| `max_context_len` | Yes | Max context length | Must be ≤ export `max_context` |
-## Performance Analysis
+### Performance
-For the math problem: "Solve the equations x + y = 12, 2x + 4y = 34, find the values of x and y",
+For the math prompt: `Solve x+y=12 and 2x+4y=34. Find x and y.`,
-Performance on RK3588 reaches **15.36 tokens/s**:
+RK3588 achieves **15.36 tokens/s**:
| Stage | Total Time (ms) | Tokens | Time per Token (ms) | Tokens per Second |
| -------- | --------------- | ------ | ------------------- | ----------------- |
| Prefill | 122.70 | 29 | 4.23 | 236.35 |
| Generate | 27539.16 | 423 | 65.10 | 15.36 |
-On RK3582, the performance reaches **10.61 tokens/s**:
-
-| Stage | Total Time (ms) | Tokens | Time per Token (ms) | Tokens per Second |
-| -------- | --------------- | ------ | ------------------- | ----------------- |
-| Prefill | 599.71 | 81 | 7.4 | 135.07 |
-| Generate | 76866.41 | 851 | 94.25 | 10.61 |
+RK3582 achieves **10.61 tokens/s**:
+| Stage | Total Time (ms) | Tokens | Time per Token (ms) | Tokens per Second |
+|----------|-----------------|--------|---------------------|-------------------|
+| Prefill | 599.71 | 81 | 7.4 | 135.07 |
+| Generate | 76866.41 | 851 | 94.25 | 10.61 |
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-install.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-install.mdx
index 626f41c7e..d80bf46f4 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-install.mdx
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-install.mdx
@@ -1,12 +1,12 @@
-## RKLLM Introduction
+## RKLLM Overview
-RKLLM helps users quickly deploy LLM models onto Rockchip chips. Currently supported chips are: RK3588/RK3576/RK3562 series chips.
+RKLLM helps you deploy LLM models to Rockchip SoCs. Currently supported chips include the RK3588 / RK3576 / RK3562 series.
-The overall framework of RKLLM is as follows:
+RKLLM architecture:

-#### Currently Supported Models
+#### Supported models
- [LLAMA models](https://huggingface.co/meta-llama)
- [TinyLLAMA models](https://huggingface.co/TinyLlama)
@@ -26,129 +26,140 @@ The overall framework of RKLLM is as follows:
- [Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)
- [Qwen3](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f)
-## RKLLM Installation
+## Download the SDK
-To use RKNPU, users need to first run the RKLLM-Toolkit on an x86 workstation to convert trained models into RKLLM format, and then perform inference on the development board using the RKLLM C API.
+Go to your SDK directory and clone the RKLLM repository.
-### x86 PC Workstation
+
-- (Optional) Install [Anaconda](https://www.anaconda.com/)
+```bash
+cd RKSDK
+git clone -b release-v1.2.3 https://github.com/airockchip/rknn-llm.git
+```
- If Python 3.11 (required version) is not installed in your system or you have multiple Python environments, it is recommended to use [Anaconda](https://www.anaconda.com/) to create a new Python 3.11 environment.
+
- - Install Anaconda
+## Install Miniforge
- Execute the following command in the terminal window of your computer to check whether Anaconda is already installed. If yes, skip this section.
+
-
+```bash
+wget https://github.com/conda-forge/miniforge/releases/download/25.11.0-0/Miniforge3-25.11.0-0-Linux-x86_64.sh
+chmod +x Miniforge3-25.11.0-0-Linux-x86_64.sh
+bash Miniforge3-25.11.0-0-Linux-x86_64.sh
+```
- ```bash
- $ conda --version
- conda 24.9.2
- ```
+
-
+## Create a virtual environment
- If you see "conda: command not found", it means Anaconda is not installed. Please refer to the [Anaconda](https://www.anaconda.com/) official website for installation instructions.
+
- - Create a conda environment
+```bash
+conda create -n rkllm python=3.12
+```
-
+
- ```bash
- conda create -n rkllm python=3.11.11
- ```
+## Activate the virtual environment
-
+
- - Enter the rkllm conda environment
+```bash
+conda activate rkllm
+```
-
+
- ```bash
- conda activate rkllm
- ```
+## Install dependencies
-
+
- - _To exit the environment_
+```bash
+cd rknn-llm/rkllm-toolkit/packages
+pip install rkllm_toolkit-1.2.3-cp312-cp312-linux_x86_64.whl
+```
-
+
- ```bash
- conda deactivate
- ```
+## Verify the installation
-
+If the following commands run without errors, the installation is successful.
-- Clone the RKLLM repository
+
-
+```bash
+$python3
+>>>from rkllm.api import RKLLM
+```
- ```bash
- git clone -b release-v1.2.1b1 https://github.com/airockchip/rknn-llm.git && cd rknn-llm
- ```
+
-
+## Toolchain for building on-device examples
-- Install RKLLM-Toolkit
+To build the on-device runtime examples, you need a cross-compilation toolchain.
- RKLLM-Toolkit is a software development kit that allows users to quantize and convert Huggingface-format LLM models on X86 PCs.
+Download: [Cross compilation toolchain](https://developer.arm.com/-/media/files/downloads/gnu/11.2-2022.02/binrel/gcc-arm-11.2-2022.02-x86_64-aarch64-none-linux-gnu.tar.xz?rev=33c6e30e5ac64e6dba8f0431f2c35f1b&revision=33c6e30e-5ac6-4e6d-ba8f-0431f2c35f1b&hash=632C6C0BD43C3E4B59CA8A09A7055D30).
-
+Extract it after downloading.
- ```bash
- pip3 install ./rkllm-toolkit/rkllm_toolkit-1.2.1b1-cp311-cp311-linux_x86_64.whl
- ```
+Before building, export the compiler path so the scripts can find the toolchain.
-
+
- If no errors occur when executing the following commands, the installation was successful.
+```bash
+export GCC_COMPILER=/path/to/your/gcc/bin/aarch64-linux-gnu
+```
-
+
- ```bash
- $python3
- >>>from rkllm.api import RKLLM
- ```
+## Device driver requirements
-
+RKLLM requires a newer RKNPU driver. Before running RKLLM Runtime on the device, confirm that your RKNPU driver is **v0.9.8**.
-### Development Board
+:::tip
+Radxa OS 6.1 images may ship with RKNPU driver **0.9.6** by default.
+Upgrade via `sudo rsetup` -> `System` -> `System Update` to get RKNPU driver **0.9.8**.
+After upgrading, run **`sudo apt autopurge`** and reboot.
+:::
-- Check whether the RKNPU driver version is at least 0.9.8. If it is lower than this version, download and flash the latest radxa 6.1 firmware.
- :::tip
- The default RKNPU driver version in the radxa 6.1 firmware is 0.9.6. Please update to version 0.9.8 via: `sudo rsetup -> System -> System Update`.
- After the update, be sure to execute **`sudo apt autopurge`** and then reboot.
- :::
+Check the driver version:
-
+
- ```bash
- $ sudo cat /sys/kernel/debug/rknpu/version
- RKNPU driver: v0.9.8
- ```
+```bash
+$ sudo cat /sys/kernel/debug/rknpu/version
+RKNPU driver: v0.9.8
+```
-
+
-- (Optional) Manually compile the NPU kernel
+Optional: build the NPU driver manually
- If you are using a non-official firmware, you may need to update the kernel. The RKNPU driver supports two main kernel versions: [kernel-5.10](https://github.com/radxa/kernel/tree/stable-5.10-rock5) and [kernel-6.1](https://github.com/radxa/kernel/tree/linux-6.1-stan-rkr1). You can confirm the specific version number in the Makefile at the root directory of the kernel. The specific steps to update the kernel are as follows:
+If you are using a non-official firmware image, you may need to update the kernel.
+The RKNPU driver package supports two major kernel versions:
+[kernel-5.10](https://github.com/radxa/kernel/tree/stable-5.10-rock5) and
+[kernel-6.1](https://github.com/radxa/kernel/tree/linux-6.1-stan-rkr1).
+You can confirm the exact version in the kernel root `Makefile`. The general update steps are:
- 1. Download the archive file [rknpu_driver_0.9.8_20241009.tar.bz2](https://github.com/airockchip/rknn-llm/tree/release-v1.2.1b1/rknpu-driver).
+1. Download [rknpu_driver_0.9.8_20241009.tar.bz2](https://github.com/airockchip/rknn-llm/tree/release-v1.2.1b1/rknpu-driver).
- 2. Extract the archive and replace the rknpu driver code in the current kernel source directory with it.
+2. Extract it and overwrite the `rknpu` driver sources in your kernel tree.
- 3. Recompile the kernel.
+3. Rebuild the kernel.
- 4. Flash the newly compiled kernel to the device.
+4. Flash the newly built kernel to the device.
-- RKLLM Runtime provides C/C++ programming interfaces for the Rockchip NPU platform, helping users deploy RKLLM models and accelerate the implementation of LLM applications. Clone the RKLLM repository on the device side.
+## Clone the repository on the device
-
+RKLLM Runtime provides C/C++ APIs for Rockchip NPUs to help you deploy RKLLM models and accelerate LLM applications.
- ```bash
- git clone -b release-v1.2.1b1 https://github.com/airockchip/rknn-llm.git
- ```
+Clone the RKLLM repository on the device:
-
+
+
+```bash
+git clone -b release-v1.2.3 https://github.com/airockchip/rknn-llm.git
+```
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-usage.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-usage.mdx
index 7b3495ccf..d9bcd225e 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-usage.mdx
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm-usage.mdx
@@ -1,6 +1,6 @@
-This document describes how to deploy Huggingface-format large language models onto RK3588 using RKLLM for hardware-accelerated inference on the NPU.
+This document explains how to use RKLLM to deploy Hugging Face-format LLMs to RK3588 and run hardware-accelerated inference on the NPU.
-#### Currently Supported Models
+#### Supported models
- [LLAMA models](https://huggingface.co/meta-llama)
- [TinyLLAMA models](https://huggingface.co/TinyLlama)
@@ -20,20 +20,20 @@ This document describes how to deploy Huggingface-format large language models o
- [Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)
- [Qwen3](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f)
-We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) as an example and follow the sample scripts provided in the [RKLLM](./rkllm_install#rkllm-installation) repository to fully demonstrate how to deploy a large language model from scratch onto a development board equipped with the RK3588 chip, utilizing the NPU for hardware-accelerated inference.
+This guide uses [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) as an example and follows the demo scripts in the [RKLLM](./rkllm_install) repository to walk through an end-to-end deployment on an RK3588 device with NPU acceleration.
:::tip
-If you have not installed or configured the RKLLM environment yet, please refer to [RKLLM Installation](rkllm_install).
+If you haven't installed and configured RKLLM yet, follow [RKLLM Installation](rkllm_install).
:::
### Model Conversion
:::tip
-For RK358X users, please specify `rk3588` as the TARGET_PLATFORM.
+For RK358x, set `TARGET_PLATFORM` to `rk3588`.
:::
-We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) as an example, but you may choose any model from the list of [currently supported models](#currently-supported-models).
+This section uses [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) as an example. You can also pick any model from the [Supported models](#supported-models) list.
-- Download the weights of [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) on your x86 PC workstation. If you haven't installed [git-lfs](https://git-lfs.com/), please install it first.
+- On an x86 Linux PC, download the model weights (install [git-lfs](https://git-lfs.com/) if needed):
@@ -44,7 +44,7 @@ We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Ins
-- Activate the rkllm conda environment. You can refer to [RKLLM Conda Installation](rkllm_install#x86-pc-workstation).
+- Activate the `rkllm` conda environment (see [RKLLM conda installation](rkllm_install)):
@@ -54,11 +54,11 @@ We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Ins
-- Generate the LLM model quantization calibration file.
+- Generate the quantization calibration file for the LLM
:::tip
- For LLM models, we use the conversion script provided in `rknn-llm/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/export`.
+ For LLM models, this guide uses the conversion scripts under `rknn-llm/xamples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/export`.
- For VLM models, use the conversion script in `rknn-llm/examples/Qwen2-VL_Demo/export`. For multimodal VLM models, please refer to [RKLLM Qwen2-VL](./rkllm_qwen2_vl).
+ For VLM models, use `rknn-llm/examples/Qwen2-VL_Demo/export`. For multimodal VLM models, see [RKLLM Qwen2-VL](./rkllm_qwen2_vl).
:::
@@ -70,13 +70,13 @@ We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Ins
- | Parameter | Required | Description | Options |
- | --------- | -------- | ------------------------------------- | ------- |
- | `path` | Required | Path to the Huggingface model folder. | N |
+ | Parameter | Required | Description | Notes |
+ | --------- | -------- | ---------------------------- | ----- |
+ | `path` | Yes | Hugging Face model directory | N/A |
- The `generate_data_quant.py` script generates the quantization file `data_quant.json` used during model quantization.
+ `generate_data_quant.py` generates `data_quant.json`, which is used during quantization.
-- Update the `modelpath` variable in `rknn-llm/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/export/export_rkllm.py` to point to your model path.
+- Update the `modelpath` in `rknn-llm/xamples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/export/export_rkllm.py`:
@@ -86,11 +86,12 @@ We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Ins
-- Adjust the maximum context length `max_context`
+- Adjust `max_context` (optional)
- If you need a specific `max_context` length, modify the value of the `max_context` parameter in the `llm.build` function within `rknn-llm/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/export/export_rkllm.py`. The default is 4096; larger values consume more memory. It must not exceed 16,384 and must be a multiple of 32 (e.g., 32, 64, 96, ..., 16,384).
+ If you need a different context length, modify `max_context` in the `llm.build` call in `rknn-llm/xamples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/export/export_rkllm.py`.
+ Default is **4096**. Larger values consume more memory. The value must be ≤ **16384** and a multiple of **32** (e.g., 32, 64, 96, …, 16384).
-- Run the model conversion script.
+- Run the conversion script
@@ -100,14 +101,15 @@ We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Ins
- After successful conversion, you will get an `.rkllm` model file — in this case, `Qwen2.5-1.5B-Instruct_W8A8_RK3588.rkllm`. From the filename, you can see that this model has been quantized using W8A8 and is compatible with the RK3588 platform.
+ After a successful conversion, you should get an RKLLM model such as `Qwen2.5-1.5B-Instruct_W8A8_RK3588.rkllm`.
+ The name indicates this model is W8A8-quantized and targeted for RK3588.
-### Compiling the Executable
+### Build the executable
-- Download the cross-compilation toolchain [gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu](https://developer.arm.com/downloads/-/gnu-a/10-2-2020-11)
-- Modify the main program code in `rknn-llm/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/deploy/src/llm_demo.cpp`
+- Download the cross-compilation toolchain: [gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu](https://developer.arm.com/downloads/-/gnu-a/10-2-2020-11)
+- Update the main program: `rknn-llm/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/deploy/src/llm_demo.cpp`
- You need to comment out line 165, since RKLLM automatically parses the `chat_template` field from the tokenizer_config.json file when converting the model, so there's no need to manually set it.
+ Comment out line 165. RKLLM parses the `chat_template` from `tokenizer_config.json` automatically during conversion, so you don't need to set it manually.
@@ -117,7 +119,7 @@ We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Ins
-- Update the `GCC_COMPILER_PATH` in the build script `rknn-llm/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/deploy/build-linux.sh`
+- Update `GCC_COMPILER_PATH` in `rknn-llm/examples/DeepSeek-R1-Distill-Qwen-1.5B_Demo/deploy/build-linux.sh`
@@ -127,7 +129,7 @@ We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Ins
-- Run the model conversion script.
+- Build
@@ -138,14 +140,14 @@ We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Ins
- The compiled executable will be located in `install/demo_Linux_aarch64`.
+ The generated binaries are located at `install/demo_Linux_aarch64`.
-### Deployment on Device
+### Deploy to the device
-#### Local Terminal Mode
+#### Local terminal mode
-- Copy the converted `.rkllm` model and the compiled `demo_Linux_aarch64` folder to the device.
-- Set up environment variables
+- Copy the converted RKLLM model and the built `demo_Linux_aarch64` folder to the device.
+- Export environment variables:
@@ -155,7 +157,7 @@ We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Ins
-- Run `llm_demo`, type `exit` to quit
+- Run `llm_demo` (type `exit` to quit):
@@ -167,103 +169,15 @@ We will use [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Ins
- | Parameter | Required | Description | Options |
- | ----------------- | -------- | ----------------------------------------------- | ---------------------------------------------------------------------------- |
- | `path` | Required | Path to the RKLLM model folder. | N |
- | `max_new_tokens` | Required | Maximum number of tokens to generate per round. | Must be less than or equal to `max_context_len` |
- | `max_context_len` | Required | Maximum context size for the model. | Must be less than or equal to the `max_context` used during model conversion |
+ | Parameter | Required | Description | Notes |
+ | ----------------- | -------- | ------------------------- | ------------------------------ |
+ | `path` | Yes | Path to the RKLLM model | N/A |
+ | `max_new_tokens` | Yes | Max generated tokens/turn | Must be ≤ `max_context_len` |
+ | `max_context_len` | Yes | Max context length | Must be ≤ export `max_context` |

-{/* #### Gradio Mode */}
-
-{/* ##### Server Side */}
-
-{/* - Prepare and enter a virtual environment. Please refer to [Using Python Virtual Environments](venv_usage). */}
-{/* - Install gradio */}
-{/* ```bash */}
-{/* pip3 install gradio */}
-{/* ``` */}
-{/* - Copy `librkllmrt.so` into `rkllm_server/lib` */}
-{/* ```bash */}
-{/* cd rkllm-runtime/Linux/librkllm_api/aarch64 */}
-{/* cp rkllm-runtime/Linux/librkllm_api/aarch64/librkllmrt.so ./examples/rkllm_server_demo/rkllm_server/lib */}
-{/* ``` */}
-{/* - Modify gradio_server.py to disable GPU acceleration for prefill */}
-{/* ```python */}
-{/* rknnllm_param.use_gpu = False */}
-{/* ``` */}
-{/* - Start the gradio server */}
-{/* ```bash */}
-{/* cd examples/rkllm_server_demo/rkllm_server */}
-{/* python3 gradio_server.py --target_platform rk3588 --rkllm_model_path your_model_path */}
-{/* ``` */}
-{/* - Access the device IP on port 8080 via browser */}
-{/*  */}
-
-{/* ##### Client Side */}
-
-{/* After enabling the Gradio server on the device, clients on the same network can call the LLM Gradio server using the Gradio API. */}
-
-{/* - Install gradio_client */}
-{/* ```bash */}
-{/* pip3 install gradio_client */}
-{/* ``` */}
-{/* - Update the IP address in chat_api_gradio.py based on your deployment */}
-{/* ```python */}
-{/* # Please update the IP according to your deployment */}
-{/* client = Client("http://192.168.2.209:8080/") */}
-{/* ``` */}
-{/* - Run chat_api_gradio.py */}
-{/* ```bash */}
-{/* cd rknn-llm/rkllm-runtime/examples/rkllm_server_demo */}
-{/* python3 chat_api_gradio.py */}
-{/* ``` */}
-{/*  */}
-
-{/* #### Flask Mode */}
-
-{/* ##### Server Side */}
-
-{/* - Install flask */}
-{/* ```bash */}
-{/* pip3 install flask==2.2.2 Werkzeug==2.2.2 */}
-{/* ``` */}
-{/* - Copy `librkllmrt.so` to `rkllm_server/lib` */}
-{/* ```bash */}
-{/* cd rknn-llm/rkllm-runtime */}
-{/* cp ./runtime//Linux/librkllm_api/aarch64/librkllmrt.so ./examples/rkllm_server_demo/rkllm_server/lib */}
-{/* ``` */}
-{/* - Modify flask_server.py to disable GPU acceleration for prefill */}
-
-{/* ```python */}
-{/* rknnllm_param.use_gpu = False */}
-{/* ``` */}
-
-{/* - Start the flask server on port 8080 */}
-{/* ```bash */}
-{/* cd examples/rkllm_server_demo/rkllm_server */}
-{/* python3 flask_server.py --target_platform rk3588 --rkllm_model_path your_model_path */}
-{/* ``` */}
-{/*  */}
-
-{/* ##### Client Side */}
-
-{/* After enabling the Flask server on the device, clients on the same network can call the server via Flask APIs. When developing custom features, simply follow the structure of this API example to wrap and parse data accordingly. */}
-
-{/* - Update the IP address in chat_api_flask.py based on your deployment */}
-{/* ```python */}
-{/* # Please update the IP according to your deployment */}
-{/* server_url = 'http://192.168.2.209:8080/rkllm_chat' */}
-{/* ``` */}
-{/* - Run chat_api_flask.py */}
-{/* ```bash */}
-{/* cd rknn-llm/rkllm-runtime/examples/rkllm_server_demo */}
-{/* python3 chat_api_flask.py */}
-{/* ``` */}
-{/*  */}
-
-### Performance Comparison for Selected Models
+### Performance comparison (selected models)
| Model | Parameter Size | Chip | Chip Count | Inference Speed |
| --------- | -------------- | ------ | ---------- | --------------- |
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm_qwen2_vl.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm_qwen2_vl.mdx
index 737d46936..e4e48a2bd 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm_qwen2_vl.mdx
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/dev/_rkllm_qwen2_vl.mdx
@@ -1,241 +1,229 @@
-[Qwen2-VL](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) is a multi-modal VLM model developed by Alibaba.
-Qwen2-VL can understand images of various resolutions and aspect ratios, comprehend videos longer than 20 minutes, function as an agent for operating mobile devices and robots, and supports multiple languages.
-This document will explain how to deploy the [Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) visual multi-modal model on RK3588 using NPU for hardware-accelerated inference.
+[Qwen2-VL](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) is a multimodal vision-language model (VLM) developed by Alibaba.
+It provides strong visual perception, adapts to images of different resolutions and aspect ratios, and supports deeper understanding of long videos (20+ minutes).
+Qwen2-VL also supports multiple languages and can act as an “agent” for tasks such as phone control and robot instruction execution.
+This document explains how to deploy **Qwen2-VL-2B-Instruct** on RK3588 using the RKLLM toolchain and run hardware-accelerated inference on the built-in NPU.

-### Model File Download
+## Quick Start
-:::tip
-Radxa has provided precompiled rkllm models and executables, which users can download and use directly. If you want to refer to the compilation process, please continue with the optional section.
-:::
-
-- Use [git LFS](https://git-lfs.com/) to download the precompiled rkllm from [ModelScope](https://modelscope.cn/models/radxa/DeepSeek-R1-Distill-Qwen-1.5B_RKLLM):
-
-
-
- ```bash
- git lfs install
- git clone https://www.modelscope.cn/radxa/Qwen2-VL-2B-RKLLM.git
- ```
-
-
-
-### (Optional) Model Compilation
-
-:::tip
-Please prepare the RKLLM working environment on both your PC and development board according to [RKLLM Installation](./rkllm_install).
-:::
-:::tip
-For RK358X users, please specify the `rk3588` platform for TARGET_PLATFORM.
-:::
+### Download the demo
-- On x86 PC workstation, download the [Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) weight files. If you haven't installed [git-lfs](https://git-lfs.com/), please install it first:
+Download the complete demo from ModelScope.
-
+
- ```bash
- git lfs install
- git clone https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct
- ```
+```bash
+pip install -U modelscope
+modelscope download --model radxa/Qwen2-VL-2B-RKLLM.git
+```
-
+
-- Activate the rkllm conda environment. You can refer to [RKLLM conda installation](rkllm_install#x86-pc-workstation) for details:
+### Run the Example
-
+
- ```bash
- conda activate rkllm
- ```
+```bash
+cd demo_Linux_aarch64/
+export LD_LIBRARY_PATH=./lib
+./demo demo.jpg ../qwen2_vl_2b_vision_rk3588.rknn ../qwen2-vl-2b-instruct_W8A8_rk3588.rkllm 2048 4096 3 "<|vision_start|>" "<|vision_end|>" "<|image_pad|>"
+```
-
+
-#### Compile Image Decoding Model
+## Full Conversion Workflow
-- Install rknn-toolkit2:
+:::info[Prerequisites]
+Set up the development environment by following [RKNN Installation](./rknn-install) and [RKLLM Installation](./rkllm-install).
-
-
- ```bash
- pip3 install rknn-toolkit2 -i https://mirrors.aliyun.com/pypi/simple
- ```
-
-
-
-- Convert to ONNX
-
- - Generate cu_seqlens and rotary_pos_emb
-
-
-
- ```bash
- python3 export/export_vision.py --step 1 --path /path/to/Qwen2-VL-2B-Instruct/ --batch 1 --height 392 --width 392
- ```
-
-
-
- - Export as ONNX
+RKLLM currently only converts the language model part, so deploying a multimodal model requires converting the vision encoder with the RKNN toolchain.
+:::
-
+### Activate the virtual environment
- ```bash
- python3 export/export_vision.py --step 2 --path /path/to/Qwen2-VL-2B-Instruct/ --batch 1 --height 392 --width 392
- ```
+
-
+```bash
+conda activate rkllm
+pip install -U huggingface_hub
+```
- | Parameter Name | Required | Description | Options |
- | -------------- | -------- | --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
- | `step` | Required | Export step. | 1/2, When `step==1`, only generates cu_seqlens and rotary_pos_emb; when `step==2`, exports ONNX. Must run `step == 1` before `step == 2`. |
- | `path` | Optional | Path to Huggingface model folder. | Default: `Qwen/Qwen2-VL-2B-Instruct` |
- | `batch` | Optional | Batch size | Default: 1 |
- | `height` | Optional | Image height | Default: 392 |
- | `width` | Optional | Image width | Default: 392 |
- | `savepath` | Optional | Save path for RKNN model | Default: `qwen2-vl-2b/qwen2_vl_2b_vision.onnx` |
+
-#### Compile RKLLM Model
+### Download the Model
-- Generate VLM model quantization calibration file:
+
-
+```bash
+cd RK-SDK/rknn-llm/examples/multimodal_model_demo/
+hf download https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct --local-dir ./Qwen2-VL-2B-Instruct
+```
- ```bash
- cd rknn-llm/examples/Qwen2-VL_Demo
- python3 data/make_input_embeds_for_quantize.py --path /path/to/Qwen2-VL-2B-Instruct
- ```
+
-
+### Model Conversion
- | Parameter | Required | Description | Options |
- | --------- | -------- | --------------------------------- | ------- |
- | `path` | Required | Path to Huggingface model folder. | N |
+Generate static positional encodings.
- The generated calibration file is saved in `data/input.json`.
+
-- Modify the maximum context value `max_context`
+```bash
+python export/export_vision_qwen2.py --step 1 --path ./Qwen2-VL-2B-Instruct
+```
- If you need to adjust the `max_context` length, modify the `max_context` parameter in the `llm.build` function interface in `rknn-llm/examples/Qwen2-VL_Demo/export/export_rkllm.py`. Larger values consume more memory. It must not exceed 16,384 and must be a multiple of 32 (e.g., 32, 64, 96, ..., 16,384).
+
-- Run the model conversion script:
+| Parameter | Required | Description | Notes |
+| ---------- | -------- | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------- |
+| `step` | Yes | Export step | 1/2. When `step==1`, only generates `cu_seqlens` and `rotary_pos_emb`. When `step==2`, exports ONNX (run `step==1` first). |
+| `path` | No | Hugging Face model directory | Default: `Qwen/Qwen2-VL-2B-Instruct` |
+| `batch` | No | Batch size | Default: 1 |
+| `height` | No | Image height | Default: 392 |
+| `width` | No | Image width | Default: 392 |
+| `savepath` | No | Output path for ONNX/RKNN | Default: `qwen2-vl-2b/qwen2_vl_2b_vision.onnx` |
-
+Export the vision module to ONNX.
- ```bash
- python3 export_rkllm.py --path /path/to/Qwen2-VL-2B-Instruct/ --target-platform rk3588 --num_npu_core 3 --quantized_dtype w8a8 --device cuda --savepath ./qwen2-vl-llm_rk3588.rkllm
- ```
+
-
+```bash
+pip install onnx==1.18
+python export/export_vision_qwen2.py --step 2 --path ./Qwen2-VL-2B-Instruct
+```
- | Parameter | Required | Description | Options |
- | ----------------- | -------- | ----------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
- | `path` | Optional | Path to Huggingface model folder. | Default: `Qwen/Qwen2-VL-2B-Instruct` |
- | `target-platform` | Optional | Target running platform | `rk3588`/`rk3576`/`rk3562`, default: `rk3588` |
- | `num_npu_core` | Optional | Number of NPU cores | For `rk3588`: [1,2,3]; `rk3576`: [1,2]; `rk3562`: [1]. Default: `3` |
- | `quantized_dtype` | Optional | RKLLM quantization type | `rk3588`: “w8a8”, “w8a8_g128”, “w8a8_g256”, “w8a8_g512”; `rk3576`: “w4a16”, “w4a16_g32”, “w4a16_g64”, “w4a16_g128”, “w8a8”; `rk3562`: “w8a8”, “w4a16_g32”, “w4a16_g64”, “w4a16_g128”, “w4a8_g32”. Default: `w8a8` |
- | `device` | Optional | Device used during model conversion | `cpu` or `cuda`. Default: `cpu` |
- | `savepath` | Optional | Save path for RKLLM model | Default: `qwen2_vl_2b_instruct.rkllm` |
+
- The generated RKLLM model is named `qwen2-vl-llm_rk3588.rkllm`.
+Convert the vision module to RKNN.
-### (Optional) Build Executable
+
-- Download the cross-compilation toolchain [gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu](https://developer.arm.com/downloads/-/gnu-a/10-2-2020-11)
+```bash
+conda activate rknn
+python export/export_vision_rknn.py --path /path/to/save/qwen2-vl-vision.onnx --target-platform rk3588
+```
-- Modify the main program `rknn-llm/examples/Qwen2-VL_Demo/deploy/src/main.cpp`
+
- You need to comment out line 179. When converting the model, RKLLM will automatically parse the
- `chat_template` field in the `tokenizer_config.json` file of the Hugging Face model, so there's no need to modify it.
+Generate a quantization calibration file.
-
+
- ```vim
- 179 // rkllm_set_chat_template(llmHandle, "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n", "<|im_start|>user\n", "<|im_end|>\n<|im_start|>assistant\n");
- ```
+```bash
+conda activate rkllm
+python data/make_input_embeds_for_quantize.py --path /path/to/Qwen2-VL-model
+```
-
+
-- Modify the main program `rknn-llm/examples/Qwen2-VL_Demo/deploy/src/llm.cpp`
+| Parameter | Required | Description | Notes |
+| --------- | -------- | ---------------------------- | ----- |
+| `path` | Yes | Hugging Face model directory | N/A |
- You need to comment out line 120. When converting the model, RKLLM will automatically parse the
- `chat_template` field in the `tokenizer_config.json` file of the Hugging Face model, so there's no need to modify it.
+Export the language module to the RKLLM format.
-
+
- ```vim
- 120 // rkllm_set_chat_template(llmHandle, "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n", "<|im_start|>user\n", "<|im_end|>\n<|im_start|>assistant\n");
- ```
+```bash
+python export/export_rkllm.py
+```
-
+
-- Modify the `GCC_COMPILER_PATH` in the compilation script `rknn-llm/examples/Qwen2-VL_Demo/deploy/build-linux.sh`
+| Parameter | Required | Description | Notes |
+| ----------------- | -------- | ----------------------------- | ---------------------------------------------------------------- |
+| `path` | No | Hugging Face model directory | Default: `Qwen/Qwen2-VL-2B-Instruct` |
+| `target-platform` | No | Target platform | `rk3588` / `rk3576` / `rk3562` (default: `rk3588`) |
+| `num_npu_core` | No | NPU core count | `rk3588`: [1,2,3], `rk3576`: [1,2], `rk3562`: [1] (default: `3`) |
+| `quantized_dtype` | No | RKLLM quantization dtype | Defaults to `w8a8` (supported options depend on the platform) |
+| `device` | No | Device used during conversion | `cpu` / `cuda` (default: `cpu`) |
+| `savepath` | No | Output RKLLM model path | Default: `qwen2_vl_2b_instruct.rkllm` |
-
+### Build the executable
- ```vim
- 5 GCC_COMPILER_PATH=/path/to/gcc-arm-10.2-2020.11-x86_64-aarch64-none-linux-gnu/bin/aarch64-none-linux-gnu
- ```
+
-
+```bash
+cd deploy/
+export GCC_COMPILER=/path/to/your/gcc/bin/aarch64-linux-gnu
+bash build-linux.sh
+```
-- Run the model conversion script
+
-
+The generated binaries are located at `install/demo_Linux_aarch64`.
- ```bash
- cd rknn-llm/examples/Qwen2-VL_Demo/deploy
- bash build-linux.sh
- ```
+### Deploy to the device
-
+Copy the converted models and the built `demo_Linux_aarch64` directory to the device.
- The generated executable file is located in `install/demo_Linux_aarch64`
+
-### Deploying on Device
+```bash
+cd demo_Linux_aarch64/
+export RKLLM_LOG_LEVEL=1
+export LD_LIBRARY_PATH=./lib
+./demo demo.jpg ../qwen2_vl_2b_vision_rk3588.rknn ../qwen2-vl-2b-instruct_W8A8_rk3588.rkllm 2048 4096 3 "<|vision_start|>" "<|vision_end|>" "<|image_pad|>"
+```
-#### Terminal Mode
+
-- Copy the converted model `qwen2-vl-llm_rk3588.rkllm` and the compiled folder `demo_Linux_aarch64` to the device
-- Set environment variables
+Run the demo. Type `exit` to quit.
-
+
- ```bash
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/demo_Linux_aarch64/lib
- ```
+```bash
+./demo demo.jpg ../qwen2_vl_2b_vision_rk3588.rknn ../qwen2-vl-2b-instruct_W8A8_rk3588.rkllm 2048 4096 3 "<|vision_start|>" "<|vision_end|>" "<|image_pad|>"
+```
-
+
- :::tip
- Users who downloaded via ModelScope can directly `export` the `librkllmrt.so` from the downloaded repository.
- :::
+| Parameter | Required | Description | Notes |
+| -------------------- | -------- | -------------------- | ------------------------------------------------------------- |
+| `image_path` | Yes | Image path | N/A |
+| `encoder_model_path` | Yes | Vision encoder RKNN | N/A |
+| `llm_model_path` | Yes | Language model RKLLM | N/A |
+| `max_new_tokens` | Yes | Max generated tokens | Must be ≤ `max_context_len` |
+| `max_context_len` | Yes | Max context length | Must be > `text_token_num + image_token_num + max_new_tokens` |
+| `core_num` | Yes | NPU core count | `rk3588`: [1,2,3], `rk3576`: [1,2], `rk3562`: [1] |
-- Run `llm_demo`, enter `exit` to quit
+```bash
+$ ./demo demo.jpg ../qwen2_vl_2b_vision_rk3588.rknn ../qwen2-vl-2b-instruct_W8A8_rk3588.rkllm 2048 4096 3
+ "<|vision_start|>" "<|vision_end|>" "<|image_pad|>"
+I rkllm: rkllm-runtime version: 1.2.3, rknpu driver version: 0.9.8, platform: RK3588
+I rkllm: loading rkllm model from ../qwen2-vl-2b-instruct_W8A8_rk3588.rkllm
+I rkllm: rkllm-toolkit version: 1.2.3, max_context_limit: 4096, npu_core_num: 3, target_platform: RK3588, model_dtype: W8A8
+I rkllm: Enabled cpus: [4, 5, 6, 7]
+I rkllm: Enabled cpus num: 4
+I rkllm: Using mrope
+rkllm init success
+main: LLM Model loaded in 3052.79 ms
+===the core num is 3===
+model input num: 1, output num: 1
+input tensors:
+ index=0, name=onnx::Expand_0, n_dims=4, dims=[1, 392, 392, 3], n_elems=460992, size=921984, fmt=NHWC, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+output tensors:
+ index=0, name=4542, n_dims=2, dims=[196, 1536, 0, 0], n_elems=301056, size=602112, fmt=UNDEFINED, type=FP16, qnt_type=AFFINE, zp=0, scale=1.000000
+model input height=392, width=392, channel=3
+main: ImgEnc Model loaded in 2362.74 ms
+main: ImgEnc Model inference took 3762.45 ms
-
+**********************You can choose a preset question or type your own********************
- ```bash
- export RKLLM_LOG_LEVEL=1
- ## Usage: ./demo image_path encoder_model_path llm_model_path max_new_tokens max_context_len core_num
- ./demo demo.jpg ./qwen2_vl_2b_vision_rk3588.rknn ./qwen2-vl-llm_rk3588.rkllm 128 512 3
- ```
+[0] What is in the image?
-
+*************************************************************************
- | Parameter | Required | Description | Options |
- | -------------------- | -------- | ------------------------------------------ | ------------------------------------------------------------- |
- | `image_path` | Required | Path to input image | N/A |
- | `encoder_model_path` | Required | Path to rknn vision encoder model | N/A |
- | `llm_model_path` | Required | Path to rkllm model | N/A |
- | `max_new_tokens` | Required | Max number of tokens to generate per round | Must be ≤ max_context_len |
- | `max_context_len` | Required | Maximum context length for the model | Must be > text-token-num + image-token-num + max_new_tokens |
- | `core_num` | Required | Number of NPU cores to use | For `rk3588`: [1,2,3], For `rk3576`: [1,2], For `rk3562`: [1] |
+user: 0
+What is in the image?
+assistant: The image depicts an astronaut sitting on a chair holding a green bottle, looking at Earth from the Moon with a starry sky in the background.
+```
- 
+Test image:
-### Performance Analysis
+
-On RK3588, up to 15.39 tokens/s,
+Performance:
| Stage | Total Time (ms) | Tokens | Time per Token (ms) | Tokens per Second |
| -------- | --------------- | ------ | ------------------- | ----------------- |
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_llava-1-6-7b.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_llava-1-6-7b.mdx
new file mode 100644
index 000000000..848bcf364
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_llava-1-6-7b.mdx
@@ -0,0 +1,318 @@
+**LLaVA** (Large Language and Vision Assistant) is a multimodal vision-language model (VLM) family developed by researchers from institutions such as the University of Wisconsin–Madison, Microsoft Research, and Columbia University.
+It connects a pre-trained vision encoder with a large language model (LLM) end-to-end, enabling joint image-and-text understanding for tasks such as image captioning, visual question answering, and multimodal chat.
+
+- Key features: uses dynamic-resolution techniques to adapt input resolution to image content, improving recognition of small objects, complex tables, and dense OCR text.
+ The core idea is to map visual features into the language space via a projection layer so the language model can directly understand and reason about visual information.
+- Model variant: **LLaVA 1.6 Vicuna 7B** is a specific model built on a Vicuna 7B language backbone (~7B parameters).
+ Compared to v1.5, it uses more training data and improved visual representations, supporting higher-resolution inputs while maintaining good inference speed.
+
+## Environment Setup
+
+Follow the [llama.cpp](../llama-cpp) document to prepare `llama.cpp`.
+
+## Quick Start
+
+### Download the Model
+
+
+
+```bash
+pip3 install modelscope
+cd llama.cpp
+modelscope download --model radxa/llava-v1.6-vicuna-7b-gguf llava-v1.6-vicuna-7B-Q5_K_M.gguf --local_dir ./
+modelscope download --model radxa/llava-v1.6-vicuna-7b-gguf mmproj-model-f16.gguf --local_dir ./
+```
+
+
+
+### Run the Model
+
+
+
+```bash
+./build/bin/llama-mtmd-cli -m ./llava-v1.6-vicuna-7B-Q5_K_M.gguf --mmproj ./mmproj-model-f16.gguf -p "Describe this image." --image ./tools/mtmd/test-1.jpeg
+```
+
+
+
+## Full Conversion Workflow
+
+### Clone the Model Repository
+
+
+
+```bash
+cd llama.cpp
+git clone https://huggingface.co/liuhaotian/llava-v1.6-vicuna-7b
+```
+
+
+
+### Create a Virtual Environment
+
+
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip3 install -r requirements.txt
+pip3 install -r tools/mtmd/requirements.txt
+```
+
+
+
+### Model Conversion
+
+#### Split the model
+
+
+
+```bash
+python3 ./tools/mtmd/legacy-models/llava_surgery_v2.py -C -m ./llava-v1.6-vicuna-7b/
+```
+
+
+
+After completion, you should find `llava.projector` and `llava.clip` in the model directory.
+
+#### Create the `vit` directory
+
+
+
+```bash
+cd ./llava-v1.6-vicuna-7b/
+mkdir vit
+cp ./llava.clip vit/pytorch_model.bin
+cp ./llava.projector vit/
+curl -s -q https://huggingface.co/cmp-nct/llava-1.6-gguf/raw/main/config_vit.json -o vit/config.json
+```
+
+
+
+#### Create the vision module
+
+
+
+```bash
+python3 ../tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision
+```
+
+
+
+#### Convert the Text Module
+
+
+
+```bash
+python3 ../examples/convert_legacy_llama.py ../llava-v1.6-vicuna-7b/ --skip-unknown
+```
+
+
+
+### Model Quantization
+
+This guide uses **Q5_K_M** quantization.
+
+
+
+```bash
+cd ..
+./build/bin/llama-quantize ./llava-v1.6-vicuna-7b/llava-v1.6-vicuna-7B-F32.gguf ./llava-v1.6-vicuna-7b/llava-v1.6-vicuna-7B-Q5_K_M.gguf Q5_K_M
+```
+
+
+
+### Model Test
+
+
+

+
+ Test input image
+
+
+
+
+
+```bash
+./build/bin/llama-mtmd-cli -m ./llava-v1.6-vicuna-7b/llava-v1.6-vicuna-7B-Q5_K_M.gguf --mmproj ./llava-v1.6-vicuna-7b/vit/mmproj-model-f16.gguf -p "What is this picture about?" --image ./tools/mtmd/test-1.jpeg
+```
+
+
+
+Model output:
+
+```bash
+$ ./build/bin/llama-mtmd-cli -m ./llava-v1.6-vicuna-7b/llava-v1.6-vicuna-7B-Q5_K_M.gguf --mmproj ./llava-v1.6-vicuna-7b/vit/mmproj-model-f16.gguf -p "What is this picture about?" --image ./tools/mtmd/test-1.jpeg --chat-template vicuna
+build: 7110 (3ae282a06) with cc (Debian 12.2.0-14+deb12u1) 12.2.0 for aarch64-linux-gnu
+llama_model_loader: loaded meta data with 26 key-value pairs and 291 tensors from ./llava-v1.6-vicuna-7b/llava-v1.6-vicuna-7B-Q5_K_M.gguf (version GGUF V3 (latest))
+llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+llama_model_loader: - kv 0: general.architecture str = llama
+llama_model_loader: - kv 1: general.name str = Llava v1.6 Vicuna 7b
+llama_model_loader: - kv 2: general.basename str = llava-v1.6-vicuna
+llama_model_loader: - kv 3: general.size_label str = 7.1B
+llama_model_loader: - kv 4: general.tags arr[str,1] = ["image-text-to-text"]
+llama_model_loader: - kv 5: llama.vocab_size u32 = 32000
+llama_model_loader: - kv 6: llama.context_length u32 = 4096
+llama_model_loader: - kv 7: llama.embedding_length u32 = 4096
+llama_model_loader: - kv 8: llama.block_count u32 = 32
+llama_model_loader: - kv 9: llama.feed_forward_length u32 = 11008
+llama_model_loader: - kv 10: llama.rope.dimension_count u32 = 128
+llama_model_loader: - kv 11: llama.attention.head_count u32 = 32
+llama_model_loader: - kv 12: llama.attention.head_count_kv u32 = 32
+llama_model_loader: - kv 13: llama.attention.layer_norm_rms_epsilon f32 = 0.000010
+llama_model_loader: - kv 14: llama.rope.freq_base f32 = 10000.000000
+llama_model_loader: - kv 15: tokenizer.ggml.model str = llama
+llama_model_loader: - kv 16: tokenizer.ggml.tokens arr[str,32000] = ["", "", "", "<0x00>", "<...
+llama_model_loader: - kv 17: tokenizer.ggml.scores arr[f32,32000] = [0.000000, 0.000000, 0.000000, 0.0000...
+llama_model_loader: - kv 18: tokenizer.ggml.token_type arr[i32,32000] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
+llama_model_loader: - kv 19: tokenizer.ggml.bos_token_id u32 = 1
+llama_model_loader: - kv 20: tokenizer.ggml.eos_token_id u32 = 2
+llama_model_loader: - kv 21: tokenizer.ggml.padding_token_id u32 = 0
+llama_model_loader: - kv 22: tokenizer.ggml.add_bos_token bool = true
+llama_model_loader: - kv 23: tokenizer.ggml.add_eos_token bool = false
+llama_model_loader: - kv 24: general.quantization_version u32 = 2
+llama_model_loader: - kv 25: general.file_type u32 = 17
+llama_model_loader: - type f32: 65 tensors
+llama_model_loader: - type q5_K: 193 tensors
+llama_model_loader: - type q6_K: 33 tensors
+print_info: file format = GGUF V3 (latest)
+print_info: file type = Q5_K - Medium
+print_info: file size = 4.45 GiB (5.68 BPW)
+load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
+load: printing all EOG tokens:
+load: - 2 ('')
+load: special tokens cache size = 3
+load: token to piece cache size = 0.1684 MB
+print_info: arch = llama
+print_info: vocab_only = 0
+print_info: n_ctx_train = 4096
+print_info: n_embd = 4096
+print_info: n_embd_inp = 4096
+print_info: n_layer = 32
+print_info: n_head = 32
+print_info: n_head_kv = 32
+print_info: n_rot = 128
+print_info: n_swa = 0
+print_info: is_swa_any = 0
+print_info: n_embd_head_k = 128
+print_info: n_embd_head_v = 128
+print_info: n_gqa = 1
+print_info: n_embd_k_gqa = 4096
+print_info: n_embd_v_gqa = 4096
+print_info: f_norm_eps = 0.0e+00
+print_info: f_norm_rms_eps = 1.0e-05
+print_info: f_clamp_kqv = 0.0e+00
+print_info: f_max_alibi_bias = 0.0e+00
+print_info: f_logit_scale = 0.0e+00
+print_info: f_attn_scale = 0.0e+00
+print_info: n_ff = 11008
+print_info: n_expert = 0
+print_info: n_expert_used = 0
+print_info: n_expert_groups = 0
+print_info: n_group_used = 0
+print_info: causal attn = 1
+print_info: pooling type = 0
+print_info: rope type = 0
+print_info: rope scaling = linear
+print_info: freq_base_train = 10000.0
+print_info: freq_scale_train = 1
+print_info: n_ctx_orig_yarn = 4096
+print_info: rope_finetuned = unknown
+print_info: model type = 7B
+print_info: model params = 6.74 B
+print_info: general.name = Llava v1.6 Vicuna 7b
+print_info: vocab type = SPM
+print_info: n_vocab = 32000
+print_info: n_merges = 0
+print_info: BOS token = 1 ''
+print_info: EOS token = 2 ''
+print_info: UNK token = 0 ''
+print_info: PAD token = 0 ''
+print_info: LF token = 13 '<0x0A>'
+print_info: EOG token = 2 ''
+print_info: max token length = 48
+load_tensors: loading model tensors, this can take a while... (mmap = true)
+load_tensors: CPU_Mapped model buffer size = 4560.87 MiB
+..................................................................................................
+llama_context: constructing llama_context
+llama_context: n_seq_max = 1
+llama_context: n_ctx = 4096
+llama_context: n_ctx_seq = 4096
+llama_context: n_batch = 2048
+llama_context: n_ubatch = 512
+llama_context: causal_attn = 1
+llama_context: flash_attn = auto
+llama_context: kv_unified = false
+llama_context: freq_base = 10000.0
+llama_context: freq_scale = 1
+llama_context: CPU output buffer size = 0.12 MiB
+llama_kv_cache: CPU KV buffer size = 2048.00 MiB
+llama_kv_cache: size = 2048.00 MiB ( 4096 cells, 32 layers, 1/1 seqs), K (f16): 1024.00 MiB, V (f16): 1024.00 MiB
+llama_context: Flash Attention was auto, set to enabled
+llama_context: CPU compute buffer size = 92.51 MiB
+llama_context: graph nodes = 999
+llama_context: graph splits = 1
+common_init_from_params: added logit bias = -inf
+common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
+common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
+Failed to infer a tool call example (possible template bug)
+mtmd_cli_context: chat template example:
+You are a helpful assistant
+
+USER: Hello
+ASSISTANT: Hi there
+USER: How are you?
+ASSISTANT:
+clip_model_loader: model name: vit-large336-custom
+clip_model_loader: description: image encoder for LLaVA
+clip_model_loader: GGUF version: 3
+clip_model_loader: alignment: 32
+clip_model_loader: n_tensors: 378
+clip_model_loader: n_kv: 26
+
+clip_model_loader: has vision encoder
+clip_ctx: CLIP using CPU backend
+load_hparams: projector: mlp
+load_hparams: n_embd: 1024
+load_hparams: n_head: 16
+load_hparams: n_ff: 4096
+load_hparams: n_layer: 23
+load_hparams: ffn_op: gelu_quick
+load_hparams: projection_dim: 768
+
+--- vision hparams ---
+load_hparams: image_size: 336
+load_hparams: patch_size: 14
+load_hparams: has_llava_proj: 1
+load_hparams: minicpmv_version: 0
+load_hparams: n_merge: 0
+load_hparams: n_wa_pattern: 0
+
+load_hparams: model size: 595.50 MiB
+load_hparams: metadata size: 0.13 MiB
+load_tensors: ffn up/down are swapped
+alloc_compute_meta: warmup with image size = 336 x 336
+alloc_compute_meta: CPU compute buffer size = 21.55 MiB
+alloc_compute_meta: graph splits = 1, nodes = 736
+warmup: flash attention is enabled
+main: loading model: ./llava-v1.6-vicuna-7b/llava-v1.6-vicuna-7B-Q5_K_M.gguf
+encoding image slice...
+image slice encoded in 9964 ms
+decoding image batch 1/2, n_tokens_batch = 2048
+image decoded (batch 1/2) in 177913 ms
+decoding image batch 2/2, n_tokens_batch = 832
+image decoded (batch 2/2) in 92931 ms
+
+ The image you've provided appears to be a page from The New York Times, dated July 20, 1969. The headline reads "Men Walk on Moon; Astronauts Land on Plain; Collect Rock!" This was a significant event in human history, as it marked the first time humans had set foot on the moon. The article discusses the historic event and the challenges faced by the astronauts during the moon landing. The date of the article is also notable, as it was published just a few days after the Apollo 11 mission, which was the first time humans had landed on the moon.
+
+
+llama_perf_context_print: load time = 1022.67 ms
+llama_perf_context_print: prompt eval time = 282489.36 ms / 2896 tokens ( 97.54 ms per token, 10.25 tokens per second)
+llama_perf_context_print: eval time = 29479.48 ms / 135 runs ( 218.37 ms per token, 4.58 tokens per second)
+llama_perf_context_print: total time = 312180.20 ms / 3031 tokens
+llama_perf_context_print: graphs reused = 134
+```
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_minicpm-o-2-6.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_minicpm-o-2-6.mdx
deleted file mode 100644
index 054065140..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_minicpm-o-2-6.mdx
+++ /dev/null
@@ -1,306 +0,0 @@
-**Qwen2-VL** is an open-source multimodal vision-language model series developed by Alibaba Cloud's Tongyi Qianwen team. This series achieves deep fusion between unified visual encoders and large language model foundations, aiming to provide powerful image understanding, fine-grained reasoning, and open-world dialogue capabilities.
-
-- **Key Features**: The series models generally possess efficient visual-semantic alignment capabilities, supporting precise image content description, complex Q&A, logical reasoning, and multi-turn interactions. Their architecture balances performance and efficiency, showing broad application potential in document analysis, intelligent assistants, and multimodal search scenarios.
-- **Version Note**: This model Qwen2-VL-2B-Instruct is a lightweight practice version of the series with approximately 2 billion parameters, optimized through instruction fine-tuning for deployment in edge and low-resource environments, enabling real-time multimodal interaction.
-
-## Environment Setup
-
-Refer to the [llama.cpp](../../../../orion/o6/app-development/artificial-intelligence/llama_cpp.md) documentation to prepare the llama.cpp tools.
-
-## Quick Start
-
-### Download Model
-
-
-
-```bash
-pip3 install modelscope
-cd llama.cpp
-modelscope download --model radxa/Qwen2-VL-2B-Instruct-NOE mmproj-Qwen2-VL-2b-Instruct-F16.gguf --local_dir ./
-modelscope download --model radxa/Qwen2-VL-2B-Instruct-NOE Qwen2-VL-2B-Instruct-Q5_K_M.gguf --local_dir ./
-modelscope download --model radxa/Qwen2-VL-2B-Instruct-NOE test.png --local_dir ./
-```
-
-
-
-### Run Model
-
-
-
-```bash
-./build/bin/llama-mtmd-cli -m ./Qwen2-VL-2B-Instruct-Q5_K_M.gguf --mmproj ./mmproj-Qwen2-VL-2b-Instruct-F16.gguf -p "Describe this image." --image ./test.png
-```
-
-
-
-## Complete Conversion Workflow
-
-### Clone Model Repository
-
-
-
-```bash
-cd llama.cpp
-git clone https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct
-```
-
-
-
-### Create Virtual Environment
-
-
-
-```bash
-python3 -m venv .venv
-source .venv/bin/activate
-pip3 install -r requirements.txt
-```
-
-
-
-### Model Conversion
-
-#### Convert Text Module
-
-
-
-```bash
-python3 ./convert_hf_to_gguf.py ./Qwen2-VL-2B-Instruct
-```
-
-
-
-#### Convert Vision Module
-
-
-
-```bash
-python3 ./convert_hf_to_gguf.py --mmproj ./Qwen2-VL-2B-Instruct
-```
-
-
-
-### Model Quantization
-
-Here we use Q5_K_M quantization.
-
-
-
-```bash
-./build/bin/llama-quantize ./Qwen2-VL-2B-Instruct/Qwen2-VL-2B-Instruct-F16.gguf ./Qwen2-VL-2B-Instruct/Qwen2-VL-2B-Instruct-Q5_K_M.gguf Q5_K_M
-```
-
-
-
-### Model Test
-
-
-

-
- Model test input
-
-
-
-
-
-```bash
-./build/bin/llama-mtmd-cli -m ./Qwen2-VL-2B-Instruct/Qwen2-VL-2B-Instruct-Q5_K_M.gguf --mmproj ./Qwen2-VL-2B-Instruct/mmproj-Qwen2-VL-2b-Instruct-F16.gguf -p "Describe this image." --image ./Qwen2-VL-2B-Instruct/test.png
-```
-
-
-
-Model output:
-
-```bash
-$ ./build/bin/llama-mtmd-cli -m ./Qwen2-VL-2B-Instruct/Qwen2-VL-2B-Instruct-Q5_K_M.gguf --mmproj ./Qwen2-VL-2B-Instruct/mmproj-Qwen
-2-VL-2b-Instruct-F16.gguf -p "Describe this image." --image ./Qwen2-VL-2B-Instruct/test.png
-build: 7110 (3ae282a06) with cc (Debian 12.2.0-14+deb12u1) 12.2.0 for aarch64-linux-gnu
-llama_model_loader: loaded meta data with 33 key-value pairs and 338 tensors from ./Qwen2-VL-2B-Instruct/Qwen2-VL-2B-Instruct-Q5_K_M.gguf (version GGUF V3 (latest))
-llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
-llama_model_loader: - kv 0: general.architecture str = qwen2vl
-llama_model_loader: - kv 1: general.type str = model
-llama_model_loader: - kv 2: general.name str = Qwen2 VL 2B Instruct
-llama_model_loader: - kv 3: general.finetune str = Instruct
-llama_model_loader: - kv 4: general.basename str = Qwen2-VL
-llama_model_loader: - kv 5: general.size_label str = 2B
-llama_model_loader: - kv 6: general.license str = apache-2.0
-llama_model_loader: - kv 7: general.base_model.count u32 = 1
-llama_model_loader: - kv 8: general.base_model.0.name str = Qwen2 VL 2B
-llama_model_loader: - kv 9: general.base_model.0.organization str = Qwen
-llama_model_loader: - kv 10: general.base_model.0.repo_url str = https://huggingface.co/Qwen/Qwen2-VL-2B
-llama_model_loader: - kv 11: general.tags arr[str,2] = ["multimodal", "image-text-to-text"]
-llama_model_loader: - kv 12: general.languages arr[str,1] = ["en"]
-llama_model_loader: - kv 13: qwen2vl.block_count u32 = 28
-llama_model_loader: - kv 14: qwen2vl.context_length u32 = 32768
-llama_model_loader: - kv 15: qwen2vl.embedding_length u32 = 1536
-llama_model_loader: - kv 16: qwen2vl.feed_forward_length u32 = 8960
-llama_model_loader: - kv 17: qwen2vl.attention.head_count u32 = 12
-llama_model_loader: - kv 18: qwen2vl.attention.head_count_kv u32 = 2
-llama_model_loader: - kv 19: qwen2vl.rope.freq_base f32 = 1000000.000000
-llama_model_loader: - kv 20: qwen2vl.attention.layer_norm_rms_epsilon f32 = 0.000001
-llama_model_loader: - kv 21: qwen2vl.rope.dimension_sections arr[i32,4] = [16, 24, 24, 0]
-llama_model_loader: - kv 22: tokenizer.ggml.model str = gpt2
-llama_model_loader: - kv 23: tokenizer.ggml.pre str = qwen2
-llama_model_loader: - kv 24: tokenizer.ggml.tokens arr[str,151936] = ["!", "\"", "#", "$", "%", "&", "'", ...
-llama_model_loader: - kv 25: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
-llama_model_loader: - kv 26: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
-llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 151645
-llama_model_loader: - kv 28: tokenizer.ggml.padding_token_id u32 = 151643
-llama_model_loader: - kv 29: tokenizer.ggml.bos_token_id u32 = 151643
-llama_model_loader: - kv 30: tokenizer.chat_template str = {% set image_count = namespace(value=...
-llama_model_loader: - kv 31: general.quantization_version u32 = 2
-llama_model_loader: - kv 32: general.file_type u32 = 17
-llama_model_loader: - type f32: 141 tensors
-llama_model_loader: - type q5_K: 168 tensors
-llama_model_loader: - type q6_K: 29 tensors
-print_info: file format = GGUF V3 (latest)
-print_info: file type = Q5_K - Medium
-print_info: file size = 1.04 GiB (5.80 BPW)
-load: printing all EOG tokens:
-load: - 151643 ('<|endoftext|>')
-load: - 151645 ('<|im_end|>')
-load: special tokens cache size = 14
-load: token to piece cache size = 0.9309 MB
-print_info: arch = qwen2vl
-print_info: vocab_only = 0
-print_info: n_ctx_train = 32768
-print_info: n_embd = 1536
-print_info: n_embd_inp = 1536
-print_info: n_layer = 28
-print_info: n_head = 12
-print_info: n_head_kv = 2
-print_info: n_rot = 128
-print_info: n_swa = 0
-print_info: is_swa_any = 0
-print_info: n_embd_head_k = 128
-print_info: n_embd_head_v = 128
-print_info: n_gqa = 6
-print_info: n_embd_k_gqa = 256
-print_info: n_embd_v_gqa = 256
-print_info: f_norm_eps = 0.0e+00
-print_info: f_norm_rms_eps = 1.0e-06
-print_info: f_clamp_kqv = 0.0e+00
-print_info: f_max_alibi_bias = 0.0e+00
-print_info: f_logit_scale = 0.0e+00
-print_info: f_attn_scale = 0.0e+00
-print_info: n_ff = 8960
-print_info: n_expert = 0
-print_info: n_expert_used = 0
-print_info: n_expert_groups = 0
-print_info: n_group_used = 0
-print_info: causal attn = 1
-print_info: pooling type = -1
-print_info: rope type = 8
-print_info: rope scaling = linear
-print_info: freq_base_train = 1000000.0
-print_info: freq_scale_train = 1
-print_info: n_ctx_orig_yarn = 32768
-print_info: rope_finetuned = unknown
-print_info: mrope sections = [16, 24, 24, 0]
-print_info: model type = 1.5B
-print_info: model params = 1.54 B
-print_info: general.name = Qwen2 VL 2B Instruct
-print_info: vocab type = BPE
-print_info: n_vocab = 151936
-print_info: n_merges = 151387
-print_info: BOS token = 151643 '<|endoftext|>'
-print_info: EOS token = 151645 '<|im_end|>'
-print_info: EOT token = 151645 '<|im_end|>'
-print_info: PAD token = 151643 '<|endoftext|>'
-print_info: LF token = 198 'Ċ'
-print_info: EOG token = 151643 '<|endoftext|>'
-print_info: EOG token = 151645 '<|im_end|>'
-print_info: max token length = 256
-load_tensors: loading model tensors, this can take a while... (mmap = true)
-load_tensors: CPU_Mapped model buffer size = 1067.26 MiB
-....................................................................................
-llama_context: constructing llama_context
-llama_context: n_seq_max = 1
-llama_context: n_ctx = 4096
-llama_context: n_ctx_seq = 4096
-llama_context: n_batch = 2048
-llama_context: n_ubatch = 512
-llama_context: causal_attn = 1
-llama_context: flash_attn = auto
-llama_context: kv_unified = false
-llama_context: freq_base = 1000000.0
-llama_context: freq_scale = 1
-llama_context: n_ctx_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
-llama_context: CPU output buffer size = 0.58 MiB
-llama_kv_cache: CPU KV buffer size = 112.00 MiB
-llama_kv_cache: size = 112.00 MiB ( 4096 cells, 28 layers, 1/1 seqs), K (f16): 56.00 MiB, V (f16): 56.00 MiB
-llama_context: Flash Attention was auto, set to enabled
-llama_context: CPU compute buffer size = 302.75 MiB
-llama_context: graph nodes = 959
-llama_context: graph splits = 1
-common_init_from_params: added <|endoftext|> logit bias = -inf
-common_init_from_params: added <|im_end|> logit bias = -inf
-common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
-common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
-mtmd_cli_context: chat template example:
-<|im_start|>system
-You are a helpful assistant<|im_end|>
-<|im_start|>user
-Hello<|im_end|>
-<|im_start|>assistant
-Hi there<|im_end|>
-<|im_start|>user
-How are you?<|im_end|>
-<|im_start|>assistant
-
-clip_model_loader: model name: Qwen2 VL 2B Instruct
-clip_model_loader: description:
-clip_model_loader: GGUF version: 3
-clip_model_loader: alignment: 32
-clip_model_loader: n_tensors: 520
-clip_model_loader: n_kv: 27
-
-clip_model_loader: has vision encoder
-clip_ctx: CLIP using CPU backend
-load_hparams: Qwen-VL models require at minimum 1024 image tokens to function correctly on grounding tasks
-load_hparams: if you encounter problems with accuracy, try adding --image-min-tokens 1024
-load_hparams: more info: https://github.com/ggml-org/llama.cpp/issues/16842
-
-load_hparams: projector: qwen2vl_merger
-load_hparams: n_embd: 1280
-load_hparams: n_head: 16
-load_hparams: n_ff: 1536
-load_hparams: n_layer: 32
-load_hparams: ffn_op: gelu_quick
-load_hparams: projection_dim: 1536
-
---- vision hparams ---
-load_hparams: image_size: 560
-load_hparams: patch_size: 14
-load_hparams: has_llava_proj: 0
-load_hparams: minicpmv_version: 0
-load_hparams: n_merge: 2
-load_hparams: n_wa_pattern: 0
-load_hparams: image_min_pixels: 6272
-load_hparams: image_max_pixels: 3211264
-
-load_hparams: model size: 1269.94 MiB
-load_hparams: metadata size: 0.18 MiB
-alloc_compute_meta: warmup with image size = 1288 x 1288
-alloc_compute_meta: CPU compute buffer size = 267.08 MiB
-alloc_compute_meta: graph splits = 1, nodes = 1085
-warmup: flash attention is enabled
-main: loading model: ./Qwen2-VL-2B-Instruct/Qwen2-VL-2B-Instruct-Q5_K_M.gguf
-encoding image slice...
-image slice encoded in 11683 ms
-decoding image batch 1/1, n_tokens_batch = 361
-image decoded (batch 1/1) in 6250 ms
-
-The image depicts a single rose placed on a marble surface, likely a table or a shelf. The rose is positioned in such a way that it is slightly tilted, with its petals facing upwards. The background features a dark, possibly stone or marble, wall with a textured surface, and a window or mirror reflecting the surroundings. The overall composition of the image creates a serene and elegant atmosphere.
-
-
-llama_perf_context_print: load time = 416.66 ms
-llama_perf_context_print: prompt eval time = 18253.30 ms / 375 tokens ( 48.68 ms per token, 20.54 tokens per second)
-llama_perf_context_print: eval time = 5283.83 ms / 78 runs ( 67.74 ms per token, 14.76 tokens per second)
-llama_perf_context_print: total time = 23892.18 ms / 453 tokens
-llama_perf_context_print: graphs reused = 0
-```
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_minicpm-v-2-6.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_minicpm-v-2-6.mdx
new file mode 100644
index 000000000..29a2d2f30
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_minicpm-v-2-6.mdx
@@ -0,0 +1,311 @@
+**MiniCPM-V** is a multimodal vision-language model (VLM) family developed by ModelBest and the NLP lab at Tsinghua University.
+It focuses on enabling multimodal capabilities on edge devices through model architectures that can process images and respond to text instructions, covering use cases such as image understanding, multi-turn conversations, and video analysis.
+
+- Key features: supports images with different aspect ratios and includes video understanding (summarization and Q&A).
+ It improves pixel-level spatial perception for coordinate grounding and object tracking. The model is optimized for edge deployment and can handle complex tables, long images, and OCR-style text extraction with reduced memory usage.
+- Model variant: **MiniCPM-V 2.6** is a concrete variant in the series with roughly **8B parameters**.
+ It supports single-image, multi-image, and short-video understanding and is suitable for mobile/edge deployments where latency and compute cost matter.
+
+## Environment Setup
+
+Follow the [llama.cpp](../llama-cpp) document to prepare `llama.cpp`.
+
+## Quick Start
+
+### Download the Model
+
+
+
+```bash
+pip3 install modelscope
+cd llama.cpp
+modelscope download --model radxa/minicpm-v-2_6-gguf ggml-model-Q5_K_M.gguf --local_dir ./
+modelscope download --model radxa/minicpm-v-2_6-gguf mmproj-model-f16.gguf --local_dir ./
+```
+
+
+
+### Run the Model
+
+
+
+```bash
+./build/bin/llama-mtmd-cli -m ./ggml-model-Q5_K_M.gguf --mmproj ./mmproj-model-f16.gguf -p "What is this picture about?" --image ./tools/mtmd/test-1.jpeg
+```
+
+
+
+## Full Conversion Workflow
+
+### Clone the Model Repository
+
+
+
+```bash
+cd llama.cpp
+hf download openbmb/MiniCPM-V-2_6 --local-dir ./MiniCPM-V-2_6
+```
+
+
+
+### Create a Virtual Environment
+
+
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip3 install -r requirements.txt
+```
+
+
+
+### Model Conversion
+
+#### Split the model
+
+
+
+```bash
+python3 ./tools/mtmd/legacy-models/minicpmv-surgery.py -m ./MiniCPM-V-2_6
+```
+
+
+
+#### Convert the Vision Module
+
+
+
+```bash
+python3 ./tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py -m ./MiniCPM-V-2_6 --minicpmv-projector ./MiniCPM-V-2_6/minicpmv.projector --output-dir ./MiniCPM-V-2_6/ --minicpmv_version 3
+```
+
+
+
+#### Convert the Text Module
+
+
+
+```bash
+python3 ./convert_hf_to_gguf.py ./MiniCPM-V-2_6/model
+```
+
+
+
+### Model Quantization
+
+This guide uses **Q5_K_M** quantization.
+
+
+
+```bash
+./build/bin/llama-quantize ./MiniCPM-V-2_6/model/Model-7.6B-F16.gguf ./MiniCPM-V-2_6/model/ggml-model-Q5_K_M.gguf Q5_K_M
+```
+
+
+
+### Model Test
+
+
+

+
+ Test input image
+
+
+
+
+
+```bash
+./build/bin/llama-mtmd-cli -m ./MiniCPM-V-2_6/model/ggml-model-Q5_K_M.gguf --mmproj ./MiniCPM-V-2_6/mmproj-model-f16.gguf -p "What is this picture about?" --image ./tools/mtmd/test-1.jpeg
+```
+
+
+
+Model output:
+
+```bash
+$ ./build/bin/llama-mtmd-cli -m ./MiniCPM-V-2_6/model/ggml-model-Q5_K_M.gguf --mmproj ./MiniCPM-V-2_6/mmproj-model-f16.gguf -p "What is this picture about?" --image ./tools/mtmd/test-1.jpeg
+build: 7110 (3ae282a06) with cc (Debian 12.2.0-14+deb12u1) 12.2.0 for aarch64-linux-gnu
+llama_model_loader: loaded meta data with 24 key-value pairs and 339 tensors from ./MiniCPM-V-2_6/model/ggml-model-Q5_K_M.gguf (version GGUF V3 (latest))
+llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+llama_model_loader: - kv 0: general.architecture str = qwen2
+llama_model_loader: - kv 1: general.type str = model
+llama_model_loader: - kv 2: general.name str = Model
+llama_model_loader: - kv 3: general.size_label str = 7.6B
+llama_model_loader: - kv 4: qwen2.block_count u32 = 28
+llama_model_loader: - kv 5: qwen2.context_length u32 = 32768
+llama_model_loader: - kv 6: qwen2.embedding_length u32 = 3584
+llama_model_loader: - kv 7: qwen2.feed_forward_length u32 = 18944
+llama_model_loader: - kv 8: qwen2.attention.head_count u32 = 28
+llama_model_loader: - kv 9: qwen2.attention.head_count_kv u32 = 4
+llama_model_loader: - kv 10: qwen2.rope.freq_base f32 = 1000000.000000
+llama_model_loader: - kv 11: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001
+llama_model_loader: - kv 12: tokenizer.ggml.model str = gpt2
+llama_model_loader: - kv 13: tokenizer.ggml.pre str = qwen2
+llama_model_loader: - kv 14: tokenizer.ggml.tokens arr[str,151666] = ["!", "\"", "#", "$", "%", "&", "'", ...
+llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,151666] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
+llama_model_loader: - kv 16: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
+llama_model_loader: - kv 17: tokenizer.ggml.bos_token_id u32 = 151644
+llama_model_loader: - kv 18: tokenizer.ggml.eos_token_id u32 = 151645
+llama_model_loader: - kv 19: tokenizer.ggml.unknown_token_id u32 = 128244
+llama_model_loader: - kv 20: tokenizer.ggml.padding_token_id u32 = 151643
+llama_model_loader: - kv 21: tokenizer.chat_template str = {% for message in messages %}{% if lo...
+llama_model_loader: - kv 22: general.quantization_version u32 = 2
+llama_model_loader: - kv 23: general.file_type u32 = 17
+llama_model_loader: - type f32: 141 tensors
+llama_model_loader: - type q5_K: 169 tensors
+llama_model_loader: - type q6_K: 29 tensors
+print_info: file format = GGUF V3 (latest)
+print_info: file type = Q5_K - Medium
+print_info: file size = 5.06 GiB (5.71 BPW)
+load: printing all EOG tokens:
+load: - 151643 ('<|endoftext|>')
+load: - 151645 ('<|im_end|>')
+load: special tokens cache size = 24
+load: token to piece cache size = 0.9310 MB
+print_info: arch = qwen2
+print_info: vocab_only = 0
+print_info: n_ctx_train = 32768
+print_info: n_embd = 3584
+print_info: n_embd_inp = 3584
+print_info: n_layer = 28
+print_info: n_head = 28
+print_info: n_head_kv = 4
+print_info: n_rot = 128
+print_info: n_swa = 0
+print_info: is_swa_any = 0
+print_info: n_embd_head_k = 128
+print_info: n_embd_head_v = 128
+print_info: n_gqa = 7
+print_info: n_embd_k_gqa = 512
+print_info: n_embd_v_gqa = 512
+print_info: f_norm_eps = 0.0e+00
+print_info: f_norm_rms_eps = 1.0e-06
+print_info: f_clamp_kqv = 0.0e+00
+print_info: f_max_alibi_bias = 0.0e+00
+print_info: f_logit_scale = 0.0e+00
+print_info: f_attn_scale = 0.0e+00
+print_info: n_ff = 18944
+print_info: n_expert = 0
+print_info: n_expert_used = 0
+print_info: n_expert_groups = 0
+print_info: n_group_used = 0
+print_info: causal attn = 1
+print_info: pooling type = -1
+print_info: rope type = 2
+print_info: rope scaling = linear
+print_info: freq_base_train = 1000000.0
+print_info: freq_scale_train = 1
+print_info: n_ctx_orig_yarn = 32768
+print_info: rope_finetuned = unknown
+print_info: model type = 7B
+print_info: model params = 7.61 B
+print_info: general.name = Model
+print_info: vocab type = BPE
+print_info: n_vocab = 151666
+print_info: n_merges = 151387
+print_info: BOS token = 151644 '<|im_start|>'
+print_info: EOS token = 151645 '<|im_end|>'
+print_info: EOT token = 151645 '<|im_end|>'
+print_info: UNK token = 128244 ''
+print_info: PAD token = 151643 '<|endoftext|>'
+print_info: LF token = 198 'Ċ'
+print_info: EOG token = 151643 '<|endoftext|>'
+print_info: EOG token = 151645 '<|im_end|>'
+print_info: max token length = 256
+load_tensors: loading model tensors, this can take a while... (mmap = true)
+load_tensors: CPU_Mapped model buffer size = 5184.87 MiB
+......................................................................................
+llama_context: constructing llama_context
+llama_context: n_seq_max = 1
+llama_context: n_ctx = 4096
+llama_context: n_ctx_seq = 4096
+llama_context: n_batch = 2048
+llama_context: n_ubatch = 512
+llama_context: causal_attn = 1
+llama_context: flash_attn = auto
+llama_context: kv_unified = false
+llama_context: freq_base = 1000000.0
+llama_context: freq_scale = 1
+llama_context: n_ctx_seq (4096) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
+llama_context: CPU output buffer size = 0.58 MiB
+llama_kv_cache: CPU KV buffer size = 224.00 MiB
+llama_kv_cache: size = 224.00 MiB ( 4096 cells, 28 layers, 1/1 seqs), K (f16): 112.00 MiB, V (f16): 112.00 MiB
+llama_context: Flash Attention was auto, set to enabled
+llama_context: CPU compute buffer size = 303.22 MiB
+llama_context: graph nodes = 959
+llama_context: graph splits = 1
+common_init_from_params: added <|endoftext|> logit bias = -inf
+common_init_from_params: added <|im_end|> logit bias = -inf
+common_init_from_params: setting dry_penalty_last_n to ctx_size = 4096
+common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
+mtmd_cli_context: chat template example:
+<|im_start|>system
+You are a helpful assistant<|im_end|>
+<|im_start|>user
+Hello<|im_end|>
+<|im_start|>assistant
+Hi there<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+
+clip_model_loader: model name:
+clip_model_loader: description: image encoder for MiniCPM-V
+clip_model_loader: GGUF version: 3
+clip_model_loader: alignment: 32
+clip_model_loader: n_tensors: 455
+clip_model_loader: n_kv: 20
+
+clip_model_loader: has vision encoder
+clip_ctx: CLIP using CPU backend
+load_hparams: projector: resampler
+load_hparams: n_embd: 1152
+load_hparams: n_head: 16
+load_hparams: n_ff: 4304
+load_hparams: n_layer: 27
+load_hparams: ffn_op: gelu
+load_hparams: projection_dim: 0
+
+--- vision hparams ---
+load_hparams: image_size: 448
+load_hparams: patch_size: 14
+load_hparams: has_llava_proj: 0
+load_hparams: minicpmv_version: 3
+load_hparams: n_merge: 0
+load_hparams: n_wa_pattern: 0
+
+load_hparams: model size: 996.02 MiB
+load_hparams: metadata size: 0.16 MiB
+load_tensors: ffn up/down are swapped
+alloc_compute_meta: warmup with image size = 448 x 448
+alloc_compute_meta: CPU compute buffer size = 55.81 MiB
+alloc_compute_meta: graph splits = 1, nodes = 893
+warmup: flash attention is enabled
+main: loading model: ./MiniCPM-V-2_6/model/ggml-model-Q5_K_M.gguf
+encoding image slice...
+image slice encoded in 5523 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 5046 ms
+encoding image slice...
+image slice encoded in 5550 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 5063 ms
+encoding image slice...
+image slice encoded in 5540 ms
+decoding image batch 1/1, n_tokens_batch = 64
+image decoded (batch 1/1) in 5083 ms
+
+The image is a black and white photograph of a newspaper, specifically the front page of The New York Times from July 21, 1969. The headline, written in bold, large font, proclaims "Men Walk on Moon". Below the headline, there's a subheading that reads "Astronauts Land on Plain; Collect Rocks, Plant Flag". The newspaper is open to a page with a photograph of a man walking on the moon. The man is wearing a spacesuit and is seen walking on the lunar surface. The photograph is accompanied by a caption that reads "A Powdery Surface Is Closely Explored". The newspaper is a significant historical artifact, marking the momentous event of the first human landing on the moon.
+
+
+llama_perf_context_print: load time = 936.29 ms
+llama_perf_context_print: prompt eval time = 33699.48 ms / 212 tokens ( 158.96 ms per token, 6.29 tokens per second)
+llama_perf_context_print: eval time = 27574.13 ms / 156 runs ( 176.76 ms per token, 5.66 tokens per second)
+llama_perf_context_print: total time = 61703.52 ms / 368 tokens
+llama_perf_context_print: graphs reused = 154
+```
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_mobilenet-v2-int8.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_mobilenet-v2-int8.mdx
new file mode 100644
index 000000000..2dc9d76fb
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_mobilenet-v2-int8.mdx
@@ -0,0 +1,128 @@
+**MobileNet** is a lightweight deep neural network family designed by Google for mobile and embedded devices.
+By using efficient convolution designs, it significantly reduces parameter count and compute cost, enabling real-time vision workloads on resource-constrained devices such as smartphones and IoT terminals.
+
+- Key features: efficient image classification, object detection, and semantic segmentation with low latency.
+- Variant: this guide uses **MobileNetV2 Int8**, which balances accuracy and efficiency and is well-suited for real-time edge deployments.
+
+:::info[Environment setup]
+Make sure the required environment is ready:
+
+- [Environment setup](../../../../orion/o6/app-development/artificial-intelligence/env-setup.md)
+- [AI Model Hub](../../../../orion/o6/app-development/artificial-intelligence/ai-hub.md)
+ :::
+
+## Quick Start
+
+### Download the model files
+
+
+
+```bash
+cd ai_model_hub_25_Q3/models/ComputeVision/Image_Classification/onnx_mobilenet_v2_12_int8/model
+wget https://modelscope.cn/models/cix/ai_model_hub_25_Q3/resolve/master/models/ComputeVision/Image_Classification/onnx_mobilenet_v2_12_int8/model/mobilenetv2-12-int8-fix.onnx
+```
+
+
+
+### Test the model
+
+:::info
+Activate your virtual environment before running.
+:::
+
+
+
+```bash
+python3 inference_onnx.py --EP NPU
+```
+
+
+
+## Full Conversion Workflow
+
+### Download the model files
+
+
+
+```bash
+cd ai_model_hub_25_Q3/models/ComputeVision/Image_Classification/onnx_mobilenet_v2_12_int8/model
+wget https://modelscope.cn/models/cix/ai_model_hub_25_Q3/resolve/master/models/ComputeVision/Image_Classification/onnx_mobilenet_v2_12_int8/model/mobilenetv2-12-int8.onnx
+```
+
+
+
+### Project structure
+
+```txt
+├── inference_onnx.py
+├── model
+├── ReadMe.md
+└── test_data
+```
+
+### Fix the model input shape
+
+
+
+```bash
+python3 -m onnxruntime.tools.make_dynamic_shape_fixed --dim_param batch_size --dim_value 1 mobilenetv2-12-int8.onnx mobilenetv2-12-int8-fix.onnx
+```
+
+
+
+:::info[Copy to the device]
+After the conversion, copy the `cix` model files to the device.
+:::
+
+### Test inference on the host
+
+
+
+```bash
+python3 inference_onnx.py --EP CPU
+```
+
+
+
+### Deploy on the NPU
+
+#### Export environment variables
+
+
+
+```bash
+export LD_LIBRARY_PATH=/usr/share/cix/lib/onnxruntime:$LD_LIBRARY_PATH
+export OPERATOR_PATH=/usr/share/cix/lib/onnxruntime/operator/
+```
+
+
+
+#### Run the inference script
+
+
+
+```bash
+python3 inference_onnx.py --EP NPU
+```
+
+
+
+#### Inference result
+
+
+
+```bash
+$ python3 ./inference_onnx.py --EP npu
+image path : ./test_data/ILSVRC2012_val_00037133.JPEG
+ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus
+image path : ./test_data/ILSVRC2012_val_00021564.JPEG
+coucal
+image path : ./test_data/ILSVRC2012_val_00024154.JPEG
+Ibizan hound, Ibizan Podenco
+image path : ./test_data/ILSVRC2012_val_00002899.JPEG
+rock python, rock snake, Python sebae
+image path : ./test_data/ILSVRC2012_val_00045790.JPEG
+Yorkshire terrier
+```
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_qwen2-5-vl-3b.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_qwen2-5-vl-3b.mdx
index abe6a47e6..32db2c7f6 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_qwen2-5-vl-3b.mdx
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_qwen2-5-vl-3b.mdx
@@ -1,15 +1,18 @@
-**Qwen2.5-VL** is a multimodal vision-language model series developed by Alibaba Cloud's Tongyi Qianwen team. Building on the advantages of its predecessor, this series further strengthens deep understanding of dynamic video, precise parsing of ultra-long documents, and logical reasoning capabilities in complex scenarios, committed to providing more universal visual interaction experiences.
+**Qwen2.5-VL** is a multimodal vision-language model (VLM) family developed by the Qwen team (Alibaba Cloud).
+Building on the strengths of the previous generation, it improves long-video understanding, ultra-long document parsing, and logical reasoning in complex scenes, aiming to provide more general and practical visual interaction capabilities.
-- **Key Features**: The series models possess excellent visual perception and alignment capabilities, capable of processing high-resolution images and video inputs of over 1 hour. Their standout advantage lies in enhanced "Visual Agent" capabilities, supporting precise coordinate perception, UI interface interaction, and complex structured data extraction, demonstrating powerful performance in automated task processing, multimodal search, and high-precision visual Q&A.
-- **Version Note**: This model Qwen2.5-VL-3B-Instruct is a medium-quantized practice version of the series with approximately 3 billion parameters, having undergone strict instruction fine-tuning. It achieves excellent balance between model performance and computational cost, retaining strong multimodal reasoning capabilities while ensuring deployment flexibility, widely suitable for edge devices, real-time interactive applications, and various low-resource development environments.
+- Key features: strong visual perception and alignment, supporting high-resolution images and video inputs longer than 1 hour.
+ A major highlight is the improved “Visual Agent” capability, enabling accurate coordinate grounding, UI interaction, and complex structured data extraction for automation workflows, multimodal search, and high-accuracy visual Q&A.
+- Model variant: **Qwen2.5-VL-3B-Instruct** is a mid-sized (~3B parameters) instruction-tuned model.
+ It provides an excellent balance between capability and compute cost, making it suitable for edge devices, real-time interactive applications, and low-resource development environments.
## Environment Setup
-Refer to the [llama.cpp](../../../../orion/o6/app-development/artificial-intelligence/llama_cpp.md) documentation to prepare the llama.cpp tools.
+Follow the [llama.cpp](../llama-cpp) document to prepare `llama.cpp`.
## Quick Start
-### Download Model
+### Download the Model
@@ -23,7 +26,7 @@ modelscope download --model radxa/Qwen2.5-VL-3B-Instruct-NOE test.png --local_di
-### Run Model
+### Run the Model
@@ -33,9 +36,9 @@ modelscope download --model radxa/Qwen2.5-VL-3B-Instruct-NOE test.png --local_di
-## Complete Conversion Workflow
+## Full Conversion Workflow
-### Clone Model Repository
+### Clone the Model Repository
@@ -46,7 +49,7 @@ git clone https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct
-### Create Virtual Environment
+### Create a Virtual Environment
@@ -60,7 +63,7 @@ pip3 install -r requirements.txt
### Model Conversion
-#### Convert Text Module
+#### Convert the Text Module
@@ -70,7 +73,7 @@ python3 ./convert_hf_to_gguf.py ./Qwen2.5-VL-3B-Instruct
-#### Convert Vision Module
+#### Convert the Vision Module
@@ -82,7 +85,7 @@ python3 ./convert_hf_to_gguf.py --mmproj ./Qwen2.5-VL-3B-Instruct
### Model Quantization
-Here we use Q5_K_M quantization.
+This guide uses **Q5_K_M** quantization.
@@ -101,14 +104,14 @@ Here we use Q5_K_M quantization.
style={{ display: "block", margin: "0 auto" }}
/>
- Model test input
+ Test input image
```bash
-./build/bin/llama-mtmd-cli -m ./Qwen2.5-VL-3B-Instruct/Qwen2.5-VL-3B-Instruct-Q5_K_M.gguf --mmproj ./Qwen2.5-VL-3B-Instruct/mmproj-Qwen2.5-VL-3b-Instruct-F16.gguf -p "Describe this image." --image ./Qwen2.5-VL-3B-Instruct/test.png
+./build/bin/llama-mtmd-cli -m ./Qwen2.5-VL-3B-Instruct/Qwen2.5-VL-3B-Instruct-Q5_K_M.gguf --mmproj ./Qwen2.5-VL-3B-Instruct/mmproj-Qwen2.5-VL-3b-Instruct-F16.gguf -p "Describe this image." --image ./test.png
```
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_qwen2vl-2b.mdx b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_qwen2vl-2b.mdx
index ed09d981d..8c0cf4121 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_qwen2vl-2b.mdx
+++ b/i18n/en/docusaurus-plugin-content-docs/current/common/orion-common/app-dev/artificial-intelligence/_qwen2vl-2b.mdx
@@ -1,15 +1,17 @@
-**Qwen2-VL** is an open-source multimodal vision-language model series developed by Alibaba Cloud's Tongyi Qianwen team. This series achieves deep fusion between unified visual encoders and large language model foundations, aiming to provide powerful image understanding, fine-grained reasoning, and open-world dialogue capabilities.
+**Qwen2-VL** is an open-source multimodal vision-language model (VLM) family developed by the Qwen team (Alibaba Cloud).
+It fuses a unified vision encoder with a large language model backbone to enable strong image understanding, fine-grained reasoning, and open-world conversations.
-- **Key Features**: The series models generally possess efficient visual-semantic alignment capabilities, supporting precise image content description, complex Q&A, logical reasoning, and multi-turn interactions. Their architecture balances performance and efficiency, showing broad application potential in document analysis, intelligent assistants, and multimodal search scenarios.
-- **Version Note**: This model Qwen2-VL-2B-Instruct is a lightweight practice version of the series with approximately 2 billion parameters, optimized through instruction fine-tuning for deployment in edge and low-resource environments, enabling real-time multimodal interaction.
+- Key features: strong vision-language alignment for accurate image captioning, complex Q&A, logical reasoning, and multi-turn interactions.
+ The design balances performance and efficiency, making it suitable for document analysis, AI assistants, and multimodal search.
+- Model variant: **Qwen2-VL-2B-Instruct** is a lightweight variant (~2B parameters) fine-tuned for instructions, making it suitable for edge devices and low-resource environments for real-time multimodal interaction.
## Environment Setup
-Refer to the [llama.cpp](../../../../orion/o6/app-development/artificial-intelligence/llama_cpp.md) documentation to prepare the llama.cpp tools.
+Follow the [llama.cpp](../llama-cpp) document to prepare `llama.cpp`.
## Quick Start
-### Download Model
+### Download the Model
@@ -23,7 +25,7 @@ modelscope download --model radxa/Qwen2-VL-2B-Instruct-NOE test.png --local_dir
-### Run Model
+### Run the Model
@@ -33,9 +35,9 @@ modelscope download --model radxa/Qwen2-VL-2B-Instruct-NOE test.png --local_dir
-## Complete Conversion Workflow
+## Full Conversion Workflow
-### Clone Model Repository
+### Clone the Model Repository
@@ -46,7 +48,7 @@ git clone https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct
-### Create Virtual Environment
+### Create a Virtual Environment
@@ -60,7 +62,7 @@ pip3 install -r requirements.txt
### Model Conversion
-#### Convert Text Module
+#### Convert the Text Module
@@ -70,7 +72,7 @@ python3 ./convert_hf_to_gguf.py ./Qwen2-VL-2B-Instruct
-#### Convert Vision Module
+#### Convert the Vision Module
@@ -82,7 +84,7 @@ python3 ./convert_hf_to_gguf.py --mmproj ./Qwen2-VL-2B-Instruct
### Model Quantization
-Here we use Q5_K_M quantization.
+This guide uses **Q5_K_M** quantization.
@@ -101,14 +103,14 @@ Here we use Q5_K_M quantization.
style={{ display: "block", margin: "0 auto" }}
/>
- Model test input
+ Test input image
```bash
-./build/bin/llama-mtmd-cli -m ./Qwen2-VL-2B-Instruct/Qwen2-VL-2B-Instruct-Q5_K_M.gguf --mmproj ./Qwen2-VL-2B-Instruct/mmproj-Qwen2-VL-2b-Instruct-F16.gguf -p "Describe this image." --image ./Qwen2-VL-2B-Instruct/test.png
+./build/bin/llama-mtmd-cli -m ./Qwen2-VL-2B-Instruct/Qwen2-VL-2B-Instruct-Q5_K_M.gguf --mmproj ./Qwen2-VL-2B-Instruct/mmproj-Qwen2-VL-2b-Instruct-F16.gguf -p "Describe this image." --image ./test.png
```
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/README.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/README.md
index 7d1ee38af..2ed292e98 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/README.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/README.md
@@ -4,6 +4,6 @@ sidebar_position: 6
# Multimodal Models
-This section mainly demonstrates the deployment of some representative multimodal models on Radxa Orion O6 / O6N.
+This section demonstrates how to deploy representative multimodal models on the Radxa Orion O6 / O6N.
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/llava-1-6-7b.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/llava-1-6-7b.md
new file mode 100644
index 000000000..a558b0e1a
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/llava-1-6-7b.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 3
+---
+
+import LLaVA from '../../../../../common/orion-common/app-dev/artificial-intelligence/\_llava-1-6-7b.mdx';
+
+# LLaVA 1.6
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/minicpm-v-2-6.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/minicpm-v-2-6.md
new file mode 100644
index 000000000..c4c4ecf92
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/minicpm-v-2-6.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 4
+---
+
+import MiniCPM from '../../../../../common/orion-common/app-dev/artificial-intelligence/\_minicpm-v-2-6.mdx';
+
+# MiniCPM-V 2.6
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/qwen2-5-vl-3b.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/qwen2-5-vl-3b.md
index 93f3d4ddc..6513c9f9f 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/qwen2-5-vl-3b.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/qwen2-5-vl-3b.md
@@ -1,5 +1,5 @@
---
-sidebar-position: 2
+sidebar_position: 2
---
import Qwen2_5vl from '../../../../../common/orion-common/app-dev/artificial-intelligence/\_qwen2-5-vl-3b.mdx';
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/qwen2vl-2b.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/qwen2vl-2b.md
index 9f11303b5..dd31fb473 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/qwen2vl-2b.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Multimodality/qwen2vl-2b.md
@@ -1,5 +1,5 @@
---
-sidebar-position: 1
+sidebar_position: 1
---
import Qwen2_vl from '../../../../../common/orion-common/app-dev/artificial-intelligence/\_qwen2vl-2b.mdx';
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Vision/mobilenet-v2-int8.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Vision/mobilenet-v2-int8.md
new file mode 100644
index 000000000..ed01440c0
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6/app-development/artificial-intelligence/Vision/mobilenet-v2-int8.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 2
+---
+
+import Mobilenet_V2 from "../../../../../common/orion-common/app-dev/artificial-intelligence/\_mobilenet-v2-int8.mdx";
+
+# MobileNetV2 Int8
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/README.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/README.md
index 7d1ee38af..2ed292e98 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/README.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/README.md
@@ -4,6 +4,6 @@ sidebar_position: 6
# Multimodal Models
-This section mainly demonstrates the deployment of some representative multimodal models on Radxa Orion O6 / O6N.
+This section demonstrates how to deploy representative multimodal models on the Radxa Orion O6 / O6N.
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/llava-1-6-7b.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/llava-1-6-7b.md
new file mode 100644
index 000000000..a558b0e1a
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/llava-1-6-7b.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 3
+---
+
+import LLaVA from '../../../../../common/orion-common/app-dev/artificial-intelligence/\_llava-1-6-7b.mdx';
+
+# LLaVA 1.6
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/minicpm-v-2-6.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/minicpm-v-2-6.md
new file mode 100644
index 000000000..c4c4ecf92
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/minicpm-v-2-6.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 4
+---
+
+import MiniCPM from '../../../../../common/orion-common/app-dev/artificial-intelligence/\_minicpm-v-2-6.mdx';
+
+# MiniCPM-V 2.6
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/qwen2-5-vl-3b.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/qwen2-5-vl-3b.md
index 93f3d4ddc..6513c9f9f 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/qwen2-5-vl-3b.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/qwen2-5-vl-3b.md
@@ -1,5 +1,5 @@
---
-sidebar-position: 2
+sidebar_position: 2
---
import Qwen2_5vl from '../../../../../common/orion-common/app-dev/artificial-intelligence/\_qwen2-5-vl-3b.mdx';
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/qwen2vl-2b.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/qwen2vl-2b.md
index 9f11303b5..dd31fb473 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/qwen2vl-2b.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Multimodality/qwen2vl-2b.md
@@ -1,5 +1,5 @@
---
-sidebar-position: 1
+sidebar_position: 1
---
import Qwen2_vl from '../../../../../common/orion-common/app-dev/artificial-intelligence/\_qwen2vl-2b.mdx';
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Vision/mobilenet-v2-int8.md b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Vision/mobilenet-v2-int8.md
new file mode 100644
index 000000000..ed01440c0
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/orion/o6n/app-development/artificial-intelligence/Vision/mobilenet-v2-int8.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 2
+---
+
+import Mobilenet_V2 from "../../../../../common/orion-common/app-dev/artificial-intelligence/\_mobilenet-v2-int8.mdx";
+
+# MobileNetV2 Int8
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/README.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/README.md
index 325ebf399..8235de252 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/README.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/README.md
@@ -2,8 +2,8 @@
sidebar_position: 4
---
-# Application development
+# Application Development
-Introduces upper-layer application development, such as QT, WiringX, Mraa, etc.
+This section covers application development on the platform, such as Qt, WiringX, Mraa, and more.
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/README.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/README.md
new file mode 100644
index 000000000..1310c75e7
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/README.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 50
+---
+
+# Artificial Intelligence
+
+This section introduces AI applications, including the RKNN and RKLLM toolchains, the RKNN Model Zoo examples, and common model deployment workflows.
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/clip.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/clip.md
new file mode 100644
index 000000000..23fdf5d62
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/clip.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 25
+---
+
+import CLIP from "../../../../common/ai/rockchip/\_clip.mdx"
+
+# CLIP
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/deeplabv3.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/deeplabv3.md
new file mode 100644
index 000000000..a5c8b96bb
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/deeplabv3.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 19
+---
+
+import Deeplabv3 from "../../../../common/ai/rockchip/\_deeplabv3.mdx"
+
+# DeepLabV3
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/lprnet.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/lprnet.md
new file mode 100644
index 000000000..221781f69
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/lprnet.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 21
+---
+
+import LPRNet from "../../../../common/ai/rockchip/\_lprnet.mdx"
+
+# LPRNet
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/mobilenet.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/mobilenet.md
new file mode 100644
index 000000000..e919b6320
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/mobilenet.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 24
+---
+
+import MobileNet from "../../../../common/ai/rockchip/\_mobilenet.mdx"
+
+# MobileNet
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/mobilesam.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/mobilesam.md
new file mode 100644
index 000000000..cb19e09bb
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/mobilesam.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 18
+---
+
+import Mobilesam from "../../../../common/ai/rockchip/\_mobilesam.mdx"
+
+# MobileSAM
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/ppocr.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/ppocr.md
new file mode 100644
index 000000000..22a7d1d3f
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/ppocr.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 20
+---
+
+import PPOCR from "../../../../common/ai/rockchip/\_ppocr.mdx"
+
+# PP-OCR
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/ppyoloe.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/ppyoloe.md
new file mode 100644
index 000000000..38e6b07a3
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/ppyoloe.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 15
+---
+
+import PPYOLOE from "../../../../common/ai/rockchip/\_ppyoloe.mdx"
+
+# PP-YOLOE
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/resnet.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/resnet.md
new file mode 100644
index 000000000..14a922eda
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/resnet.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 23
+---
+
+import ResNet from "../../../../common/ai/rockchip/\_resnet.mdx"
+
+# ResNet
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/retinaface.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/retinaface.md
new file mode 100644
index 000000000..2c00f1789
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/retinaface.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 22
+---
+
+import Retinaface from "../../../../common/ai/rockchip/\_retinaface.mdx"
+
+# RetinaFace
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-deepseek-r1.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-deepseek-r1.md
new file mode 100644
index 000000000..289de7aa7
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-deepseek-r1.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 12
+---
+
+# RKLLM DeepSeek-R1
+
+import RKLLMDEEPSEEKR1 from '../../../../common/dev/\_rkllm-deepseek-r1.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-install.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-install.md
new file mode 100644
index 000000000..2fd23ebf6
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-install.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 3
+---
+
+# RKLLM Installation
+
+import Rkllminstall from '../../../../common/dev/\_rkllm-install.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-qwen2-vl.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-qwen2-vl.md
new file mode 100644
index 000000000..fb4435d10
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-qwen2-vl.md
@@ -0,0 +1,10 @@
+---
+sidebar_position: 13
+description: "Run the Qwen2-VL multimodal model with RKLLM"
+---
+
+# RKLLM Qwen2-VL
+
+import RKLLMQWEN2VL from '../../../../common/dev/\_rkllm_qwen2_vl.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-usage.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-usage.md
new file mode 100644
index 000000000..9a13b9cb9
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rkllm-usage.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 4
+---
+
+# RKLLM Usage
+
+import Rkllmusage from '../../../../common/dev/\_rkllm-usage.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-custom-yolo.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-custom-yolo.md
new file mode 100644
index 000000000..a1f0feb23
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-custom-yolo.md
@@ -0,0 +1,10 @@
+---
+sidebar_position: 10
+description: "Convert a custom-trained YOLO model"
+---
+
+# Convert a Custom-Trained YOLO Model
+
+import RKNNCUSTOMYOLO from '../../../../common/ai/\_rknn_custom_yolo.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-install.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-install.md
new file mode 100644
index 000000000..f61b60371
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-install.md
@@ -0,0 +1,10 @@
+---
+sidebar_position: 1
+description: "Install RKNN to run AI models efficiently on Rockchip NPUs"
+---
+
+# RKNN Installation
+
+import RKNNinstall from '../../../../common/dev/\_rknn-install.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-model-zoo.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-model-zoo.md
new file mode 100644
index 000000000..38b2628de
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-model-zoo.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 2
+---
+
+# RKNN Model Zoo
+
+import RKNNModelZoo from "../../../../common/ai/rockchip/\_rknn_model_zoo.mdx"
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-quick-start.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-quick-start.md
new file mode 100644
index 000000000..07e6d4b1f
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-quick-start.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 5
+---
+
+# RKNN Quick Start
+
+import Rknntoolkit2 from '../../../../common/dev/\_rknn-toolkit2.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-stable-diffusion.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-stable-diffusion.md
new file mode 100644
index 000000000..37ca531d6
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-stable-diffusion.md
@@ -0,0 +1,10 @@
+---
+sidebar_position: 11
+description: "Convert Stable Diffusion models with RKNN"
+---
+
+# Stable Diffusion (RKNN)
+
+import StableDiffusionConvert from "../../../../common/ai/\_stable_diffusion_convert.mdx";
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit-lite2-yolov5.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit-lite2-yolov5.md
new file mode 100644
index 000000000..febe9b641
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit-lite2-yolov5.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 7
+---
+
+# Deploy YOLOv5 on the Device
+
+import RKNNTOOLKITLITE2YOLOV5 from '../../../../common/dev/\_rknn-toolkit-lite2-yolov5.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit-lite2-yolov8.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit-lite2-yolov8.md
new file mode 100644
index 000000000..70ba715c1
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit-lite2-yolov8.md
@@ -0,0 +1,10 @@
+---
+sidebar_position: 8
+description: "Deploy YOLOv8 on-device with RK3588"
+---
+
+# Deploy YOLOv8 on the Device
+
+import RKNNTOOLKITLITE2YOLOV8 from '../../../../common/dev/\_rknn-toolkit-lite2-yolov8.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit2-pc.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit2-pc.md
new file mode 100644
index 000000000..9e8dfc490
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-toolkit2-pc.md
@@ -0,0 +1,10 @@
+---
+sidebar_position: 6
+description: "Run simulated inference with RKNN Toolkit2 on a PC"
+---
+
+# Simulated Inference for a YOLOv5 Segmentation Model
+
+import RKNNTOOLKIT2PC from '../../../../common/dev/\_rknn-toolkit2-pc.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-ultralytics.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-ultralytics.md
new file mode 100644
index 000000000..a806f35f0
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/rknn-ultralytics.md
@@ -0,0 +1,10 @@
+---
+sidebar_position: 9
+description: "Deploy Ultralytics YOLO models on RK3588/RK356X"
+---
+
+# RKNN Ultralytics YOLOv11
+
+import RKNNULTRALYTICS from '../../../../common/dev/\_rknn-ultralytics.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/wav2vec2.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/wav2vec2.md
new file mode 100644
index 000000000..f4ff4bae4
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/wav2vec2.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 27
+---
+
+import Wav2vec2 from "../../../../common/ai/rockchip/\_wav2vec2.mdx"
+
+# Wav2Vec 2.0
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/whisper.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/whisper.md
new file mode 100644
index 000000000..fecb5a30d
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/whisper.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 26
+---
+
+import Whisper from "../../../../common/ai/rockchip/\_whisper.mdx"
+
+# Whisper
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolo-world.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolo-world.md
new file mode 100644
index 000000000..65a56aab6
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolo-world.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 16
+---
+
+import YOLOWorld from "../../../../common/ai/rockchip/\_yolo_world.mdx"
+
+# YOLO World
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolov8-seg.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolov8-seg.md
new file mode 100644
index 000000000..02161f60e
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolov8-seg.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 17
+---
+
+import YOLOv8Seg from "../../../../common/ai/rockchip/\_yolov8_seg.mdx"
+
+# YOLOv8-Seg
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolov8.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolov8.md
new file mode 100644
index 000000000..6f5bb28e3
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ai/yolov8.md
@@ -0,0 +1,9 @@
+---
+sidebar_position: 14
+---
+
+import YOLOv8 from "../../../../common/ai/rockchip/\_yolov8.mdx"
+
+# YOLOv8
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/gpiod.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/gpiod.md
index fd85fbff0..9107fb284 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/gpiod.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/gpiod.md
@@ -1,6 +1,6 @@
---
-sidebar_position: 2
-description: "Take a Deep Dive into GPIOD"
+sidebar_position: 1
+description: "Learn GPIOD in depth"
---
# GPIOD Usage
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/hdmi-rx.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/hdmi-rx.md
index a1ad31947..7125f520c 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/hdmi-rx.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/hdmi-rx.md
@@ -1,6 +1,6 @@
---
-sidebar_position: 4
-description: "Explore HDMI RX features and experience seamless access and processing of HD video and audio"
+sidebar_position: 3
+description: "Explore HDMI RX and handle high-quality video/audio input"
---
import HdmiRX from '../../../common/dev/\_hdmi-rx.mdx';
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/moonlight.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/moonlight.md
index 15e19f291..7e80f03ca 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/moonlight.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/moonlight.md
@@ -1,5 +1,5 @@
---
-sidebar_position: 8
+sidebar_position: 6
---
import Moonlight from '../../../common/dev/\_moonlight.mdx';
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/mraa.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/mraa.md
index 413b2da50..622a936e1 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/mraa.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/mraa.md
@@ -1,6 +1,6 @@
---
-sidebar_position: 3
-description: "Explore the Eclipse Mraa, a versatile I/O communication library for Linux"
+sidebar_position: 2
+description: "Explore Eclipse Mraa, a versatile I/O library for Linux"
---
# Mraa Usage
@@ -9,7 +9,7 @@ import MRAA from '../../../common/dev/\_mraa.mdx';
-
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/opencv.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/opencv.md
index 1ed4c1691..900fd9e9d 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/opencv.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/opencv.md
@@ -1,6 +1,6 @@
---
-sidebar_position: 5
-description: "Open the door to OpenCV computer vision that seamlessly connects images to the real world"
+sidebar_position: 4
+description: "Get started with OpenCV for computer vision"
---
# OpenCV
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/qt-cross-compile.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/qt-cross-compile.md
new file mode 100644
index 000000000..f1a7468f0
--- /dev/null
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/qt-cross-compile.md
@@ -0,0 +1,10 @@
+---
+sidebar_position: 5
+description: "Cross-compile Qt applications for ARM platforms"
+---
+
+# Qt Cross Compilation
+
+import QTCROSSCOMPILE from '../../../common/dev/\_qt-cross-compile.mdx';
+
+
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/qt_cross_compile.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/qt_cross_compile.md
deleted file mode 100644
index 785c4e882..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/qt_cross_compile.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sidebar_position: 7
-description: "Enjoy the convenience and fun of developing efficient and user-friendly applications on the ARM platform with QT cross-compilation"
----
-
-# QT Cross Compile
-
-import QTCROSSCOMPILE from '../../../common/dev/\_qt-cross-compile.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/redroid.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/redroid.md
index 3aab3f0f0..07bf4e2a9 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/redroid.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/redroid.md
@@ -1,10 +1,10 @@
---
-sidebar_position: 21
-description: "Help you use Redroid cloud mirroring"
+sidebar_position: 9
+description: "Use the Redroid cloud-phone image"
---
import Redroid from '../../../common/dev/\_redroid.mdx';
-# USE Redroid
+# Using Redroid
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_deepseek_r1.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_deepseek_r1.md
deleted file mode 100644
index ff12ca8db..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_deepseek_r1.md
+++ /dev/null
@@ -1,9 +0,0 @@
----
-sidebar_position: 23
----
-
-# RKLLM DeepSeek-R1
-
-import RKLLMDEEPSEEKR1 from '../../../common/dev/\_rkllm-deepseek-r1.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_install.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_install.md
deleted file mode 100644
index 7d931c57d..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_install.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sidebar_position: 13
-description: "Start the journey of efficient deployment of intelligent language models with RKLLM installation, bringing technology and human intelligence together"
----
-
-# RKLLM Installation
-
-import Rkllminstall from '../../../common/dev/\_rkllm-install.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_qwen2_vl.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_qwen2_vl.md
deleted file mode 100644
index 6afbe6ed3..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_qwen2_vl.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sidebar_position: 24
-description: "Run the Qwen2_VL large language model using RKLLM"
----
-
-# RKLLM Qwen2-VL
-
-import RKLLMQWEN2VL from '../../../common/dev/\_rkllm_qwen2_vl.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_usage.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_usage.md
deleted file mode 100644
index 521a03afb..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rkllm_usage.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sidebar_position: 14
-description: "Efficient hardware acceleration of large language models using RKLLM technology for a new chapter in intelligent dialogue"
----
-
-# RKLLM Usage and Deploy LLM
-
-import Rkllmusage from '../../../common/dev/\_rkllm-usage.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_custom_yolo.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_custom_yolo.md
deleted file mode 100644
index 8273aa7b9..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_custom_yolo.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sidebar_position: 25
-description: "Convert Custom Trained YOLO Models"
----
-
-# Convert Custom Trained YOLO Models
-
-import RKNNCUSTOMYOLO from '../../../common/ai/\_rknn_custom_yolo.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_install.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_install.md
deleted file mode 100644
index 81652cbd0..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_install.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sidebar_position: 9
-description: "Start the journey of efficient inference of AI models on Rockchip NPU through RKNN installation, and feel the perfect fusion of technology and humanities"
----
-
-# RKNN Installation
-
-import RKNNinstall from '../../../common/dev/\_rknn-install.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_quick_start.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_quick_start.md
deleted file mode 100644
index 3e8dd1978..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_quick_start.md
+++ /dev/null
@@ -1,9 +0,0 @@
----
-sidebar_position: 15
----
-
-# RKNN Quick Example
-
-import Rknntoolkit2 from '../../../common/dev/\_rknn-toolkit2.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit2_pc.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit2_pc.md
deleted file mode 100644
index 18e2a6f6d..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit2_pc.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sidebar_position: 10
-description: "Explore simulated inference of AI models with the RKNN toolkit and experience the efficiency and precision of intelligent image segmentation"
----
-
-# Simulate YOLOv5 Segmentation Inference
-
-import RKNNTOOLKIT2PC from '../../../common/dev/\_rknn-toolkit2-pc.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit_lite2_yolov5.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit_lite2_yolov5.md
deleted file mode 100644
index 63ac0141e..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit_lite2_yolov5.md
+++ /dev/null
@@ -1,9 +0,0 @@
----
-sidebar_position: 11
----
-
-# Deploy YOLOv5 Object Detection on the Board
-
-import RKNNTOOLKITLITE2YOLOV5 from '../../../common/dev/\_rknn-toolkit-lite2-yolov5.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit_lite2_yolov8.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit_lite2_yolov8.md
deleted file mode 100644
index 989db3471..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_toolkit_lite2_yolov8.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sidebar_position: 12
-description: "Deploying YOLOv8 on the RK3588 board side opens a new era of intelligent target detection, allowing technology and humanistic care to merge perfectly in accurate identification"
----
-
-# Deploy YOLOv8 Object Detection on the Board
-
-import RKNNTOOLKITLITE2YOLOV8 from '../../../common/dev/\_rknn-toolkit-lite2-yolov8.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_ultralytics.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_ultralytics.md
deleted file mode 100644
index f823a8a0a..000000000
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rknn_ultralytics.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-sidebar_position: 16
-description: "Deploying YOLOv11 on the RK3588/356X board side opens a new era of intelligent target detection, allowing technology and humanistic care to merge perfectly in accurate identification"
----
-
-# RKNN Ultralytics YOLOv11
-
-import RKNNULTRALYTICS from '../../../common/dev/\_rknn-ultralytics.mdx';
-
-
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ros2_humble.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ros2-humble.md
similarity index 67%
rename from i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ros2_humble.md
rename to i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ros2-humble.md
index dd96c5aca..a98c36e54 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ros2_humble.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/ros2-humble.md
@@ -1,8 +1,8 @@
---
-sidebar_position: 20
+sidebar_position: 8
---
-# Install ROS2 HUMBLE
+# Install ROS 2 Humble
import ROS2 from '../../../common/dev/\_ros2-humble-compilation.mdx';
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rtsp.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rtsp.md
index db4a7ec70..33b3713f8 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rtsp.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/rtsp.md
@@ -1,6 +1,6 @@
---
-sidebar_position: 8
-description: "Dig deep into the RTSP push streaming capabilities of the ROCK 5 series and enjoy the perfect blend of technology and humanity that makes HD video streaming as smooth as poetry"
+sidebar_position: 7
+description: "Stream RTSP from the ROCK 5 series"
---
import Rtsp from '../../../common/dev/\_rtsp.mdx';
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/venv_usage.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/venv-usage.md
similarity index 60%
rename from i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/venv_usage.md
rename to i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/venv-usage.md
index 0485109ca..0bafe3d57 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/venv_usage.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/venv-usage.md
@@ -1,8 +1,8 @@
---
-sidebar_position: 22
+sidebar_position: 10
---
-# Python Virtual Environment Usage
+# Python Virtual Environments
import VENVUSAGE from '../../../common/dev/\_venv_usage.mdx';
diff --git a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/vscode-remote-ssh.md b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/vscode-remote-ssh.md
index 51a052b46..6f7ff4b78 100644
--- a/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/vscode-remote-ssh.md
+++ b/i18n/en/docusaurus-plugin-content-docs/current/rock5/rock5b/app-development/vscode-remote-ssh.md
@@ -1,9 +1,9 @@
---
-sidebar_position: 50
+sidebar_position: 11
---
import VSCODEREMOTESSH from '../../../common/dev/\_vscode-remote-ssh.mdx';
-# VS Code Remote SSH Development
+# Develop with VS Code Remote SSH