Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
a04ef3e
feat:add Qwen2.5omni text modal processing
KKkai0315 Jan 22, 2026
c9333ab
add qwen2.5omni vision, audio modal
KKkai0315 Jan 23, 2026
e959822
fix: Enhance quantization modules. Introduced FixedActivationQDQ for …
chenghuaWang Jan 17, 2026
0672432
fix: Suppress deprecated comma-subscript warnings in CMake and remove…
chenghuaWang Jan 17, 2026
927f7eb
feat(qualcomm): Add installation targets for flatbuffers and MllmQNNB…
chenghuaWang Jan 19, 2026
d2e6b36
feat(qualcomm): Refactor Qwen3 model to integrate ConcatObserver for …
chenghuaWang Jan 19, 2026
48c259a
feat(cpu): Implement fill operations for various data types including…
chenghuaWang Jan 20, 2026
e976d11
feat(qnn): Enhance QNNBackend initialization with improved logging an…
chenghuaWang Jan 21, 2026
224d68e
feat(qnn): Update quantization handling and embedding output data typ…
chenghuaWang Jan 23, 2026
d2d5c09
feat(qwen3): Integrate QEmbedding for quantized embeddings and refine…
chenghuaWang Jan 23, 2026
c4f2306
fix
KKkai0315 Jan 23, 2026
a235a13
fix
KKkai0315 Jan 23, 2026
eeac11f
Merge remote-tracking branch 'refs/remotes/origin/main'
KKkai0315 Jan 23, 2026
adc3b64
add ConvTranspose1dOp & TanhOp
KKkai0315 Jan 24, 2026
674f97c
fix: fix Tanh op and add test for Tanh Op and ConvTranspose1d Op
KKkai0315 Jan 25, 2026
e1ba448
add minicpmo45
KKkai0315 Feb 23, 2026
8c0cda7
merge
KKkai0315 Feb 23, 2026
af574ae
add
KKkai0315 Feb 24, 2026
06b754c
add qwen2.5o talker
KKkai0315 Mar 5, 2026
5676edc
add
KKkai0315 Mar 5, 2026
4baacd3
Merge branch 'main' into main
oreomaker Mar 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ add_subdirectory(qwen2vl)
add_subdirectory(qwen2vl_tracer)
add_subdirectory(qwen2_5vl)
add_subdirectory(qwen2_5vl_tracer)
add_subdirectory(minicpm_o45)
add_subdirectory(llama)
add_subdirectory(minicpm_o)
add_subdirectory(minicpm4)
Expand Down
7 changes: 7 additions & 0 deletions examples/minicpm_o45/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
add_executable(mllm-minicpm-o45-runner main.cpp)
target_link_libraries(mllm-minicpm-o45-runner PRIVATE MllmRT MllmCPUBackend)
target_include_directories(mllm-minicpm-o45-runner PRIVATE ${MLLM_INCLUDE_DIR})

# add_executable(mllm-minicpm-o45-runner-python main_python.cpp)
# target_link_libraries(mllm-minicpm-o45-runner-python PRIVATE MllmRT MllmCPUBackend)
# target_include_directories(mllm-minicpm-o45-runner-python PRIVATE ${MLLM_INCLUDE_DIR})
285 changes: 285 additions & 0 deletions examples/minicpm_o45/config_minicpm_o45.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
{
"architectures": [
"MiniCPMO"
],
"version": "4.5",
"attention_bias": false,
"attention_dropout": 0.0,
"audio_chunk_length": 1.0,
"audio_config": {
"_attn_implementation_autoset": true,
"_name_or_path": "openai/whisper-medium",
"activation_dropout": 0.0,
"activation_function": "gelu",
"apply_spec_augment": false,
"architectures": [
"MiniCPMWhisperEncoder"
],
"attention_dropout": 0.0,
"begin_suppress_tokens": [
220,
50257
],
"bos_token_id": 50257,
"classifier_proj_size": 256,
"d_model": 1024,
"decoder_attention_heads": 16,
"decoder_ffn_dim": 4096,
"decoder_layerdrop": 0.0,
"decoder_layers": 24,
"decoder_start_token_id": 50258,
"dropout": 0.0,
"encoder_attention_heads": 16,
"encoder_ffn_dim": 4096,
"encoder_layerdrop": 0.0,
"encoder_layers": 24,
"eos_token_id": 50257,
"forced_decoder_ids": [
[
1,
50259
],
[
2,
50359
],
[
3,
50363
]
],
"init_std": 0.02,
"mask_feature_length": 10,
"mask_feature_min_masks": 0,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_min_masks": 2,
"mask_time_prob": 0.05,
"max_length": 448,
"max_source_positions": 1500,
"max_target_positions": 448,
"median_filter_width": 7,
"model_type": "whisper",
"num_hidden_layers": 24,
"num_mel_bins": 80,
"pad_token_id": 50257,
"scale_embedding": false,
"suppress_tokens": [
1,
2,
7,
8,
9,
10,
14,
25,
26,
27,
28,
29,
31,
58,
59,
60,
61,
62,
63,
90,
91,
92,
93,
359,
503,
522,
542,
873,
893,
902,
918,
922,
931,
1350,
1853,
1982,
2460,
2627,
3246,
3253,
3268,
3536,
3846,
3961,
4183,
4667,
6585,
6647,
7273,
9061,
9383,
10428,
10929,
11938,
12033,
12331,
12562,
13793,
14157,
14635,
15265,
15618,
16553,
16604,
18362,
18956,
20075,
21675,
22520,
26130,
26161,
26435,
28279,
29464,
31650,
32302,
32470,
36865,
42863,
47425,
49870,
50254,
50258,
50358,
50359,
50360,
50361,
50362
],
"torch_dtype": "float32",
"use_cache": true,
"use_weighted_layer_sum": false,
"vocab_size": 51865
},
"audio_pool_step": 5,
"auto_map": {
"AutoConfig": "configuration_minicpmo.MiniCPMOConfig",
"AutoModel": "modeling_minicpmo.MiniCPMO",
"AutoModelForCausalLM": "modeling_minicpmo.MiniCPMO"
},
"batch_vision_input": true,
"bos_token_id": 151643,
"drop_vision_last_layer": false,
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 4096,
"image_size": 448,
"init_audio": true,
"init_tts": true,
"init_vision": true,
"initializer_range": 0.02,
"intermediate_size": 12288,
"listen_speak_type": "asr",
"max_position_embeddings": 40960,
"max_window_layers": 36,
"model_type": "minicpmo",
"num_attention_heads": 32,
"num_hidden_layers": 36,
"num_key_value_heads": 8,
"patch_size": 14,
"query_num": 64,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000,
"slice_config": {
"max_slice_nums": 1,
"model_type": "minicpmv",
"patch_size": 14,
"scale_resolution": 448
},
"slice_mode": true,
"sliding_window": null,
"stream_input": true,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.51.0",
"tts_config": {
"_attn_implementation_autoset": true,
"attention_type": "full_attention",
"attn_implementation": "sdpa",
"audio_bos_token_id": 151687,
"audio_tokenizer_sample_rate": 16000,
"audio_tokenizer_type": "s3tokenizer",
"aug_layer_loss_weight": false,
"aug_loss_weight": false,
"backbone_model": "llama",
"condition_type": "hidden_text_merge",
"cosyvoice_config_path": null,
"cosyvoice_model_dir": null,
"filter_tts_loss": false,
"hidden_act": "silu",
"hidden_size": 768,
"interleaved": false,
"intermediate_size": 3072,
"llm_dim": 4096,
"llm_dim_model_base": 256,
"llm_down_scale": false,
"llm_hidden_size": 4096,
"llm_intermediate_size": 768,
"long_weight": 0.1,
"max_position_embeddings": 4096,
"model_type": "minicpmtts",
"normalize_projected_hidden": true,
"num_attention_heads": 12,
"num_audio_tokens": 6562,
"num_hidden_layers": 20,
"num_key_value_heads": 12,
"num_mel_bins": 100,
"num_text_tokens": 152064,
"num_vq": 1,
"projector_type": "mlp",
"recomputed_chunks": 1,
"s3_stream_chunk_size": 25,
"s3_stream_generate": false,
"s3_stream_n_timesteps": 10,
"s3_stream_prelook_size": 3,
"short_weight": 0.1,
"streaming": false,
"streaming_audio_chunk_size": 50,
"streaming_sliding_window": false,
"streaming_sliding_window_audio_frame_rate": 50,
"streaming_sliding_window_audio_init_text_length": 10,
"streaming_sliding_window_audio_window_size": 300,
"streaming_sliding_window_average_speed": 5,
"streaming_sliding_window_fast_speed": 7,
"streaming_sliding_window_max_text_len": 500,
"streaming_sliding_window_slow_speed": 3,
"streaming_sliding_window_text_window_size": 50,
"streaming_text_chunk_max": 7,
"streaming_text_chunk_min": 3,
"streaming_text_reserved_len": 300,
"text_eos_token_id": 151692,
"tts_filter_loss_fix": false,
"use_llm_hidden_state": false,
"use_text": true,
"window_size": 2
},
"use_cache": true,
"use_image_id": true,
"use_sliding_window": false,
"vision_batch_size": 16,
"vision_config": {
"_attn_implementation_autoset": true,
"attention_dropout": 0.0,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 1152,
"image_size": 980,
"intermediate_size": 4304,
"layer_norm_eps": 1e-06,
"model_type": "siglip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 27,
"patch_size": 14
},
"vocab_size": 151748
}
Loading
Loading