From b0c4c850e63665c5358ea5a9bc90feefb8d0e41e Mon Sep 17 00:00:00 2001 From: Emiya173 Date: Fri, 6 Feb 2026 20:08:53 +0800 Subject: [PATCH] 1. fix opus encode 2. generate compile_command.json to supprot clangd 3. safety and style improvement --- CMakeLists.txt | 1 + src/decodeAudio.cpp | 206 ++++++++++++++++++++++++++------------------ 2 files changed, 123 insertions(+), 84 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 881b639..d9bb78e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.15) project(ffmpegAddon) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # FFmpeg 自动检测 set(FFMPEG_OUT_DIR "${CMAKE_SOURCE_DIR}/ffmpeg_out") diff --git a/src/decodeAudio.cpp b/src/decodeAudio.cpp index 4520f35..993f894 100644 --- a/src/decodeAudio.cpp +++ b/src/decodeAudio.cpp @@ -1,4 +1,5 @@ #include "decodeAudio.h" +#include "libavcodec/codec_id.h" #include #include @@ -18,7 +19,7 @@ static const std::map FORMAT_CONFIGS = { {"wma", {"asf", AV_CODEC_ID_WMAV2, AV_SAMPLE_FMT_FLTP, 128000}}, {"m4a", {"ipod", AV_CODEC_ID_AAC, AV_SAMPLE_FMT_FLTP, 128000}}, {"spx", {"ogg", AV_CODEC_ID_SPEEX, AV_SAMPLE_FMT_S16, 24600}}, - {"ogg", {"ogg", AV_CODEC_ID_VORBIS, AV_SAMPLE_FMT_FLTP, 128000}}, + {"ogg", {"ogg", AV_CODEC_ID_OPUS, AV_SAMPLE_FMT_S16, 48000}}, {"wav", {"wav", AV_CODEC_ID_PCM_S16LE, AV_SAMPLE_FMT_S16, 0}}, {"flac", {"flac", AV_CODEC_ID_FLAC, AV_SAMPLE_FMT_S16, 0}} }; @@ -76,7 +77,7 @@ class DecodeAudioToFmtWorker : public AsyncWorker } AVStream *input_stream = input_fmt_ctx->streams[audio_stream_index]; - + // 初始化解码器 const AVCodec *decoder = avcodec_find_decoder(input_stream->codecpar->codec_id); if (!decoder) @@ -85,8 +86,15 @@ class DecodeAudioToFmtWorker : public AsyncWorker SetError("Decoder not found"); return; } - + AVCodecContext *decoder_ctx = avcodec_alloc_context3(decoder); + if (!decoder_ctx) + { + avformat_close_input(&input_fmt_ctx); + SetError("Failed to allocate decoder context"); + return; + } + avcodec_parameters_to_context(decoder_ctx, input_stream->codecpar); if (avcodec_open2(decoder_ctx, decoder, nullptr) < 0) { @@ -112,11 +120,11 @@ class DecodeAudioToFmtWorker : public AsyncWorker // 支持的采样率列表 const int supported_rates[] = {48000, 44100, 32000, 24000, 16000, 12000, 8000}; const int num_rates = sizeof(supported_rates) / sizeof(supported_rates[0]); - + // 找到最接近的采样率 int closest_rate = supported_rates[0]; int min_diff = abs(src_sample_rate - supported_rates[0]); - + for (int i = 1; i < num_rates; ++i) { int diff = abs(src_sample_rate - supported_rates[i]); @@ -126,7 +134,7 @@ class DecodeAudioToFmtWorker : public AsyncWorker closest_rate = supported_rates[i]; } } - + out_sample_rate = closest_rate; } @@ -135,6 +143,10 @@ class DecodeAudioToFmtWorker : public AsyncWorker { out_sample_rate = 8000; } + else if(config.codec_id == AV_CODEC_ID_OPUS) + { + out_sample_rate = 48000; + } // 设置输出声道布局(单声道) AVChannelLayout out_ch_layout = AV_CHANNEL_LAYOUT_MONO; @@ -151,36 +163,42 @@ class DecodeAudioToFmtWorker : public AsyncWorker } AVCodecContext *encoder_ctx = avcodec_alloc_context3(encoder); + if (!encoder_ctx) + { + avcodec_free_context(&decoder_ctx); + avformat_close_input(&input_fmt_ctx); + SetError("Failed to allocate encoder context"); + return; + } + + // 启用EXPERIMENTAL 编码器 + encoder_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; + encoder_ctx->sample_rate = out_sample_rate; - encoder_ctx->ch_layout = out_ch_layout; - - // 检查编码器支持的采样格式 + av_channel_layout_copy(&encoder_ctx->ch_layout, &out_ch_layout); + + // 【关键修复2】检查编码器支持的采样格式 + enum AVSampleFormat target_sample_fmt = config.sample_fmt; if (encoder->sample_fmts) { const enum AVSampleFormat *p = encoder->sample_fmts; bool format_supported = false; while (*p != AV_SAMPLE_FMT_NONE) { - if (*p == config.sample_fmt) + if (*p == target_sample_fmt) { format_supported = true; break; } p++; } - if (!format_supported && encoder->sample_fmts[0] != AV_SAMPLE_FMT_NONE) - { - encoder_ctx->sample_fmt = encoder->sample_fmts[0]; - } - else + if (!format_supported) { - encoder_ctx->sample_fmt = config.sample_fmt; + // 使用编码器支持的第一个格式 + target_sample_fmt = encoder->sample_fmts[0]; } } - else - { - encoder_ctx->sample_fmt = config.sample_fmt; - } + encoder_ctx->sample_fmt = target_sample_fmt; if (config.bit_rate > 0) { @@ -257,27 +275,19 @@ class DecodeAudioToFmtWorker : public AsyncWorker } // 设置输入声道布局 - AVChannelLayout tmp_ch_layout; - bool tmp_ch_layout_allocated = false; - const AVChannelLayout *in_ch_layout = &input_stream->codecpar->ch_layout; - if (in_ch_layout->nb_channels == 0) - { - av_channel_layout_default(&tmp_ch_layout, src_channels); - in_ch_layout = &tmp_ch_layout; - tmp_ch_layout_allocated = true; - } + AVChannelLayout src_ch_layout; + av_channel_layout_default(&src_ch_layout, src_channels); - // 初始化重采样器 - 使用编码器实际的采样格式 + // 初始化重采样器 SwrContext *swr_ctx = nullptr; if (swr_alloc_set_opts2(&swr_ctx, &out_ch_layout, encoder_ctx->sample_fmt, out_sample_rate, - in_ch_layout, src_sample_fmt, src_sample_rate, + &src_ch_layout, src_sample_fmt, src_sample_rate, 0, nullptr) < 0 || !swr_ctx) { + av_channel_layout_uninit(&src_ch_layout); if (swr_ctx) swr_free(&swr_ctx); - if (tmp_ch_layout_allocated) - av_channel_layout_uninit(&tmp_ch_layout); if (!(output_fmt_ctx->oformat->flags & AVFMT_NOFILE)) avio_closep(&output_fmt_ctx->pb); avformat_free_context(output_fmt_ctx); @@ -290,8 +300,7 @@ class DecodeAudioToFmtWorker : public AsyncWorker if (swr_init(swr_ctx) < 0) { swr_free(&swr_ctx); - if (tmp_ch_layout_allocated) - av_channel_layout_uninit(&tmp_ch_layout); + av_channel_layout_uninit(&src_ch_layout); if (!(output_fmt_ctx->oformat->flags & AVFMT_NOFILE)) avio_closep(&output_fmt_ctx->pb); avformat_free_context(output_fmt_ctx); @@ -315,8 +324,8 @@ class DecodeAudioToFmtWorker : public AsyncWorker frame_size = 1152; // 默认帧大小 } - // 创建FIFO缓冲区用于累积样本 - AVAudioFifo *fifo = av_audio_fifo_alloc(encoder_ctx->sample_fmt, out_channels, frame_size * 2); + // 【关键修复3】创建FIFO缓冲区 - 使用实际的采样格式 + AVAudioFifo *fifo = av_audio_fifo_alloc(encoder_ctx->sample_fmt, out_channels, frame_size * 4); if (!fifo) { av_packet_free(&output_packet); @@ -324,8 +333,7 @@ class DecodeAudioToFmtWorker : public AsyncWorker av_frame_free(&decoded_frame); av_packet_free(&input_packet); swr_free(&swr_ctx); - if (tmp_ch_layout_allocated) - av_channel_layout_uninit(&tmp_ch_layout); + av_channel_layout_uninit(&src_ch_layout); if (!(output_fmt_ctx->oformat->flags & AVFMT_NOFILE)) avio_closep(&output_fmt_ctx->pb); avformat_free_context(output_fmt_ctx); @@ -351,14 +359,16 @@ class DecodeAudioToFmtWorker : public AsyncWorker swr_get_delay(swr_ctx, src_sample_rate) + decoded_frame->nb_samples, out_sample_rate, src_sample_rate, AV_ROUND_UP); - // 分配重采样帧 + // 【关键修复4】正确分配重采样帧 + av_frame_unref(resampled_frame); resampled_frame->format = encoder_ctx->sample_fmt; - resampled_frame->ch_layout = out_ch_layout; + av_channel_layout_copy(&resampled_frame->ch_layout, &out_ch_layout); resampled_frame->sample_rate = out_sample_rate; resampled_frame->nb_samples = dst_nb_samples; if (av_frame_get_buffer(resampled_frame, 0) < 0) { + av_frame_unref(decoded_frame); continue; } @@ -370,31 +380,40 @@ class DecodeAudioToFmtWorker : public AsyncWorker if (converted_samples > 0) { // 将重采样的数据写入FIFO - av_audio_fifo_write(fifo, (void **)resampled_frame->data, converted_samples); + if (av_audio_fifo_write(fifo, (void **)resampled_frame->data, converted_samples) < converted_samples) + { + av_frame_unref(decoded_frame); + continue; + } // 当FIFO中有足够的样本时,编码帧 while (av_audio_fifo_size(fifo) >= frame_size) { AVFrame *encode_frame = av_frame_alloc(); + if (!encode_frame) + break; + encode_frame->format = encoder_ctx->sample_fmt; - encode_frame->ch_layout = out_ch_layout; + av_channel_layout_copy(&encode_frame->ch_layout, &out_ch_layout); encode_frame->sample_rate = out_sample_rate; encode_frame->nb_samples = frame_size; if (av_frame_get_buffer(encode_frame, 0) >= 0) { - av_audio_fifo_read(fifo, (void **)encode_frame->data, frame_size); - encode_frame->pts = pts; - pts += frame_size; - - if (avcodec_send_frame(encoder_ctx, encode_frame) == 0) + if (av_audio_fifo_read(fifo, (void **)encode_frame->data, frame_size) == frame_size) { - while (avcodec_receive_packet(encoder_ctx, output_packet) == 0) + encode_frame->pts = pts; + pts += frame_size; + + if (avcodec_send_frame(encoder_ctx, encode_frame) == 0) { - output_packet->stream_index = 0; - av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base); - av_interleaved_write_frame(output_fmt_ctx, output_packet); - av_packet_unref(output_packet); + while (avcodec_receive_packet(encoder_ctx, output_packet) == 0) + { + output_packet->stream_index = 0; + av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base); + av_interleaved_write_frame(output_fmt_ctx, output_packet); + av_packet_unref(output_packet); + } } } } @@ -402,7 +421,7 @@ class DecodeAudioToFmtWorker : public AsyncWorker } } - av_frame_unref(resampled_frame); + av_frame_unref(decoded_frame); } } } @@ -417,8 +436,9 @@ class DecodeAudioToFmtWorker : public AsyncWorker swr_get_delay(swr_ctx, src_sample_rate) + decoded_frame->nb_samples, out_sample_rate, src_sample_rate, AV_ROUND_UP); + av_frame_unref(resampled_frame); resampled_frame->format = encoder_ctx->sample_fmt; - resampled_frame->ch_layout = out_ch_layout; + av_channel_layout_copy(&resampled_frame->ch_layout, &out_ch_layout); resampled_frame->sample_rate = out_sample_rate; resampled_frame->nb_samples = dst_nb_samples; @@ -435,25 +455,30 @@ class DecodeAudioToFmtWorker : public AsyncWorker while (av_audio_fifo_size(fifo) >= frame_size) { AVFrame *encode_frame = av_frame_alloc(); + if (!encode_frame) + break; + encode_frame->format = encoder_ctx->sample_fmt; - encode_frame->ch_layout = out_ch_layout; + av_channel_layout_copy(&encode_frame->ch_layout, &out_ch_layout); encode_frame->sample_rate = out_sample_rate; encode_frame->nb_samples = frame_size; if (av_frame_get_buffer(encode_frame, 0) >= 0) { - av_audio_fifo_read(fifo, (void **)encode_frame->data, frame_size); - encode_frame->pts = pts; - pts += frame_size; - - if (avcodec_send_frame(encoder_ctx, encode_frame) == 0) + if (av_audio_fifo_read(fifo, (void **)encode_frame->data, frame_size) == frame_size) { - while (avcodec_receive_packet(encoder_ctx, output_packet) == 0) + encode_frame->pts = pts; + pts += frame_size; + + if (avcodec_send_frame(encoder_ctx, encode_frame) == 0) { - output_packet->stream_index = 0; - av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base); - av_interleaved_write_frame(output_fmt_ctx, output_packet); - av_packet_unref(output_packet); + while (avcodec_receive_packet(encoder_ctx, output_packet) == 0) + { + output_packet->stream_index = 0; + av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base); + av_interleaved_write_frame(output_fmt_ctx, output_packet); + av_packet_unref(output_packet); + } } } } @@ -461,14 +486,23 @@ class DecodeAudioToFmtWorker : public AsyncWorker } } } - av_frame_unref(resampled_frame); + av_frame_unref(decoded_frame); } // 刷新重采样器中剩余的样本 - int converted_samples = swr_convert(swr_ctx, resampled_frame->data, frame_size, nullptr, 0); - if (converted_samples > 0) + av_frame_unref(resampled_frame); + resampled_frame->format = encoder_ctx->sample_fmt; + av_channel_layout_copy(&resampled_frame->ch_layout, &out_ch_layout); + resampled_frame->sample_rate = out_sample_rate; + resampled_frame->nb_samples = frame_size; + + if (av_frame_get_buffer(resampled_frame, 0) >= 0) { - av_audio_fifo_write(fifo, (void **)resampled_frame->data, converted_samples); + int converted_samples = swr_convert(swr_ctx, resampled_frame->data, frame_size, nullptr, 0); + if (converted_samples > 0) + { + av_audio_fifo_write(fifo, (void **)resampled_frame->data, converted_samples); + } } // 处理FIFO中剩余的样本 @@ -476,25 +510,30 @@ class DecodeAudioToFmtWorker : public AsyncWorker { int remaining = av_audio_fifo_size(fifo); AVFrame *encode_frame = av_frame_alloc(); + if (!encode_frame) + break; + encode_frame->format = encoder_ctx->sample_fmt; - encode_frame->ch_layout = out_ch_layout; + av_channel_layout_copy(&encode_frame->ch_layout, &out_ch_layout); encode_frame->sample_rate = out_sample_rate; encode_frame->nb_samples = remaining; if (av_frame_get_buffer(encode_frame, 0) >= 0) { - av_audio_fifo_read(fifo, (void **)encode_frame->data, remaining); - encode_frame->pts = pts; - pts += remaining; - - if (avcodec_send_frame(encoder_ctx, encode_frame) == 0) + if (av_audio_fifo_read(fifo, (void **)encode_frame->data, remaining) == remaining) { - while (avcodec_receive_packet(encoder_ctx, output_packet) == 0) + encode_frame->pts = pts; + pts += remaining; + + if (avcodec_send_frame(encoder_ctx, encode_frame) == 0) { - output_packet->stream_index = 0; - av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base); - av_interleaved_write_frame(output_fmt_ctx, output_packet); - av_packet_unref(output_packet); + while (avcodec_receive_packet(encoder_ctx, output_packet) == 0) + { + output_packet->stream_index = 0; + av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base); + av_interleaved_write_frame(output_fmt_ctx, output_packet); + av_packet_unref(output_packet); + } } } } @@ -524,9 +563,8 @@ class DecodeAudioToFmtWorker : public AsyncWorker av_frame_free(&decoded_frame); av_packet_free(&input_packet); swr_free(&swr_ctx); - if (tmp_ch_layout_allocated) - av_channel_layout_uninit(&tmp_ch_layout); - + av_channel_layout_uninit(&src_ch_layout); + if (!(output_fmt_ctx->oformat->flags & AVFMT_NOFILE)) avio_closep(&output_fmt_ctx->pb); avformat_free_context(output_fmt_ctx);