From 33257883d0b5739aa4b030442fd9d06cf5895c99 Mon Sep 17 00:00:00 2001 From: Ivy233 Date: Thu, 25 Sep 2025 15:36:43 +0800 Subject: [PATCH] =?UTF-8?q?bugfix=EF=BC=9Astream=E5=9C=A8--use-vad?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E4=B8=8B=EF=BC=8C=E9=80=81=E5=85=A5=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E7=9A=84=E6=95=B0=E6=8D=AE=E5=A4=9A=E9=99=A4=E4=BA=86?= =?UTF-8?q?=E4=B8=80=E4=B8=AA32768=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/stream/stream.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/stream/stream.cc b/examples/stream/stream.cc index 63588a3..09ac86a 100644 --- a/examples/stream/stream.cc +++ b/examples/stream/stream.cc @@ -50,7 +50,7 @@ struct sense_voice_stream_params { int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); int32_t n_processors = 1; int32_t capture_id = -1; - int32_t chunk_size = 100; // ms + int32_t chunk_size = 50; // ms int32_t max_nomute_chunks = 8000 / chunk_size;// chunks int32_t min_mute_chunks = 1000 / chunk_size; // chunks @@ -374,16 +374,16 @@ int main(int argc, char **argv) { int actual_chunk_size = n_sample_step; int vad_chunk_size = std::max(640, actual_chunk_size); std::vector vad_chunk(vad_chunk_size, 0); - + int start_idx = i - idenitified_floats; - + // 确保不越界访问 for (int j = 0; j < actual_chunk_size && start_idx + j < pcmf32.size(); j++) { if (start_idx + j >= 0) { - vad_chunk[j] = static_cast(pcmf32[start_idx + j]) / 32768.0f; + vad_chunk[j] = static_cast(pcmf32[start_idx + j]); } } - + // 如果实际chunk小于640,用最后一个样本值填充 if (actual_chunk_size < 640) { float last_sample = (actual_chunk_size > 0) ? vad_chunk[actual_chunk_size - 1] : 0.0f; @@ -396,9 +396,9 @@ int main(int argc, char **argv) { if (silero_vad_encode_internal(*ctx, *ctx->state, vad_chunk, params.n_threads, speech_prob)) { isnomute = (speech_prob >= params.speech_prob_threshold); // 调试信息:显示VAD结果 - // if (speech_prob > 0.1) { // 只显示有意义的概率 - // fprintf(stderr, "VAD: prob=%.3f, threshold=%.3f, isnomute=%d\n", - // speech_prob, params.threshold, isnomute); + // if (i <= 256000) { // 只显示有意义的概率 + // fprintf(stderr, "VAD: prob=%.3f, threshold=%.3f, isnomute=%d, L_new_chunk=%d, R_new_chunk=%d, i=%d\n", + // speech_prob, params.speech_prob_threshold, isnomute, L_new_chunk, R_new_chunk, i); // } } else { // 如果 VAD 处理失败,回退到vad_energy_zcr函数