@@ -50,7 +50,7 @@ struct sense_voice_stream_params {
5050 int32_t n_threads = std::min(4 , (int32_t ) std::thread::hardware_concurrency());
5151 int32_t n_processors = 1 ;
5252 int32_t capture_id = -1 ;
53- int32_t chunk_size = 100 ; // ms
53+ int32_t chunk_size = 50 ; // ms
5454 int32_t max_nomute_chunks = 8000 / chunk_size;// chunks
5555 int32_t min_mute_chunks = 1000 / chunk_size; // chunks
5656
@@ -374,16 +374,16 @@ int main(int argc, char **argv) {
374374 int actual_chunk_size = n_sample_step;
375375 int vad_chunk_size = std::max (640 , actual_chunk_size);
376376 std::vector<float > vad_chunk (vad_chunk_size, 0 );
377-
377+
378378 int start_idx = i - idenitified_floats;
379-
379+
380380 // 确保不越界访问
381381 for (int j = 0 ; j < actual_chunk_size && start_idx + j < pcmf32.size (); j++) {
382382 if (start_idx + j >= 0 ) {
383- vad_chunk[j] = static_cast <float >(pcmf32[start_idx + j]) / 32768 . 0f ;
383+ vad_chunk[j] = static_cast <float >(pcmf32[start_idx + j]);
384384 }
385385 }
386-
386+
387387 // 如果实际chunk小于640,用最后一个样本值填充
388388 if (actual_chunk_size < 640 ) {
389389 float last_sample = (actual_chunk_size > 0 ) ? vad_chunk[actual_chunk_size - 1 ] : 0 .0f ;
@@ -396,9 +396,9 @@ int main(int argc, char **argv) {
396396 if (silero_vad_encode_internal (*ctx, *ctx->state , vad_chunk, params.n_threads , speech_prob)) {
397397 isnomute = (speech_prob >= params.speech_prob_threshold );
398398 // 调试信息:显示VAD结果
399- // if (speech_prob > 0.1 ) { // 只显示有意义的概率
400- // fprintf(stderr, "VAD: prob=%.3f, threshold=%.3f, isnomute=%d\n",
401- // speech_prob, params.threshold , isnomute);
399+ // if (i <= 256000 ) { // 只显示有意义的概率
400+ // fprintf(stderr, "VAD: prob=%.3f, threshold=%.3f, isnomute=%d, L_new_chunk=%d, R_new_chunk=%d, i=%d \n",
401+ // speech_prob, params.speech_prob_threshold , isnomute, L_new_chunk, R_new_chunk, i );
402402 // }
403403 } else {
404404 // 如果 VAD 处理失败,回退到vad_energy_zcr函数
0 commit comments