From b0c4c850e63665c5358ea5a9bc90feefb8d0e41e Mon Sep 17 00:00:00 2001
From: Emiya173 <cno.101@qq.com>
Date: Fri, 6 Feb 2026 20:08:53 +0800
Subject: [PATCH] 1. fix opus encode 2. generate compile_command.json to
 supprot clangd 3. safety and style improvement

---
 CMakeLists.txt      |   1 +
 src/decodeAudio.cpp | 206 ++++++++++++++++++++++++++------------------
 2 files changed, 123 insertions(+), 84 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 881b639..d9bb78e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.15)
 project(ffmpegAddon)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 # FFmpeg 自动检测
 set(FFMPEG_OUT_DIR "${CMAKE_SOURCE_DIR}/ffmpeg_out")
diff --git a/src/decodeAudio.cpp b/src/decodeAudio.cpp
index 4520f35..993f894 100644
--- a/src/decodeAudio.cpp
+++ b/src/decodeAudio.cpp
@@ -1,4 +1,5 @@
 #include "decodeAudio.h"
+#include "libavcodec/codec_id.h"
 #include <iostream>
 #include <map>
 
@@ -18,7 +19,7 @@ static const std::map<std::string, FormatConfig> FORMAT_CONFIGS = {
     {"wma", {"asf", AV_CODEC_ID_WMAV2, AV_SAMPLE_FMT_FLTP, 128000}},
     {"m4a", {"ipod", AV_CODEC_ID_AAC, AV_SAMPLE_FMT_FLTP, 128000}},
     {"spx", {"ogg", AV_CODEC_ID_SPEEX, AV_SAMPLE_FMT_S16, 24600}},
-    {"ogg", {"ogg", AV_CODEC_ID_VORBIS, AV_SAMPLE_FMT_FLTP, 128000}},
+    {"ogg", {"ogg", AV_CODEC_ID_OPUS, AV_SAMPLE_FMT_S16, 48000}},
     {"wav", {"wav", AV_CODEC_ID_PCM_S16LE, AV_SAMPLE_FMT_S16, 0}},
     {"flac", {"flac", AV_CODEC_ID_FLAC, AV_SAMPLE_FMT_S16, 0}}
 };
@@ -76,7 +77,7 @@ class DecodeAudioToFmtWorker : public AsyncWorker
         }
 
         AVStream *input_stream = input_fmt_ctx->streams[audio_stream_index];
-        
+
         // 初始化解码器
         const AVCodec *decoder = avcodec_find_decoder(input_stream->codecpar->codec_id);
         if (!decoder)
@@ -85,8 +86,15 @@ class DecodeAudioToFmtWorker : public AsyncWorker
             SetError("Decoder not found");
             return;
         }
-        
+
         AVCodecContext *decoder_ctx = avcodec_alloc_context3(decoder);
+        if (!decoder_ctx)
+        {
+            avformat_close_input(&input_fmt_ctx);
+            SetError("Failed to allocate decoder context");
+            return;
+        }
+
         avcodec_parameters_to_context(decoder_ctx, input_stream->codecpar);
         if (avcodec_open2(decoder_ctx, decoder, nullptr) < 0)
         {
@@ -112,11 +120,11 @@ class DecodeAudioToFmtWorker : public AsyncWorker
             // 支持的采样率列表
             const int supported_rates[] = {48000, 44100, 32000, 24000, 16000, 12000, 8000};
             const int num_rates = sizeof(supported_rates) / sizeof(supported_rates[0]);
-            
+
             // 找到最接近的采样率
             int closest_rate = supported_rates[0];
             int min_diff = abs(src_sample_rate - supported_rates[0]);
-            
+
             for (int i = 1; i < num_rates; ++i)
             {
                 int diff = abs(src_sample_rate - supported_rates[i]);
@@ -126,7 +134,7 @@ class DecodeAudioToFmtWorker : public AsyncWorker
                     closest_rate = supported_rates[i];
                 }
             }
-            
+
             out_sample_rate = closest_rate;
         }
 
@@ -135,6 +143,10 @@ class DecodeAudioToFmtWorker : public AsyncWorker
         {
             out_sample_rate = 8000;
         }
+        else if(config.codec_id == AV_CODEC_ID_OPUS)
+        {
+            out_sample_rate = 48000;
+        }
 
         // 设置输出声道布局(单声道)
         AVChannelLayout out_ch_layout = AV_CHANNEL_LAYOUT_MONO;
@@ -151,36 +163,42 @@ class DecodeAudioToFmtWorker : public AsyncWorker
         }
 
         AVCodecContext *encoder_ctx = avcodec_alloc_context3(encoder);
+        if (!encoder_ctx)
+        {
+            avcodec_free_context(&decoder_ctx);
+            avformat_close_input(&input_fmt_ctx);
+            SetError("Failed to allocate encoder context");
+            return;
+        }
+
+        // 启用EXPERIMENTAL 编码器
+        encoder_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
+
         encoder_ctx->sample_rate = out_sample_rate;
-        encoder_ctx->ch_layout = out_ch_layout;
-        
-        // 检查编码器支持的采样格式
+        av_channel_layout_copy(&encoder_ctx->ch_layout, &out_ch_layout);
+
+        // 【关键修复2】检查编码器支持的采样格式
+        enum AVSampleFormat target_sample_fmt = config.sample_fmt;
         if (encoder->sample_fmts)
         {
             const enum AVSampleFormat *p = encoder->sample_fmts;
             bool format_supported = false;
             while (*p != AV_SAMPLE_FMT_NONE)
             {
-                if (*p == config.sample_fmt)
+                if (*p == target_sample_fmt)
                 {
                     format_supported = true;
                     break;
                 }
                 p++;
             }
-            if (!format_supported && encoder->sample_fmts[0] != AV_SAMPLE_FMT_NONE)
-            {
-                encoder_ctx->sample_fmt = encoder->sample_fmts[0];
-            }
-            else
+            if (!format_supported)
             {
-                encoder_ctx->sample_fmt = config.sample_fmt;
+                // 使用编码器支持的第一个格式
+                target_sample_fmt = encoder->sample_fmts[0];
             }
         }
-        else
-        {
-            encoder_ctx->sample_fmt = config.sample_fmt;
-        }
+        encoder_ctx->sample_fmt = target_sample_fmt;
         
         if (config.bit_rate > 0)
         {
@@ -257,27 +275,19 @@ class DecodeAudioToFmtWorker : public AsyncWorker
         }
 
         // 设置输入声道布局
-        AVChannelLayout tmp_ch_layout;
-        bool tmp_ch_layout_allocated = false;
-        const AVChannelLayout *in_ch_layout = &input_stream->codecpar->ch_layout;
-        if (in_ch_layout->nb_channels == 0)
-        {
-            av_channel_layout_default(&tmp_ch_layout, src_channels);
-            in_ch_layout = &tmp_ch_layout;
-            tmp_ch_layout_allocated = true;
-        }
+        AVChannelLayout src_ch_layout;
+        av_channel_layout_default(&src_ch_layout, src_channels);
 
-        // 初始化重采样器 - 使用编码器实际的采样格式
+        // 初始化重采样器
         SwrContext *swr_ctx = nullptr;
         if (swr_alloc_set_opts2(&swr_ctx,
                                 &out_ch_layout, encoder_ctx->sample_fmt, out_sample_rate,
-                                in_ch_layout, src_sample_fmt, src_sample_rate,
+                                &src_ch_layout, src_sample_fmt, src_sample_rate,
                                 0, nullptr) < 0 || !swr_ctx)
         {
+            av_channel_layout_uninit(&src_ch_layout);
             if (swr_ctx)
                 swr_free(&swr_ctx);
-            if (tmp_ch_layout_allocated)
-                av_channel_layout_uninit(&tmp_ch_layout);
             if (!(output_fmt_ctx->oformat->flags & AVFMT_NOFILE))
                 avio_closep(&output_fmt_ctx->pb);
             avformat_free_context(output_fmt_ctx);
@@ -290,8 +300,7 @@ class DecodeAudioToFmtWorker : public AsyncWorker
         if (swr_init(swr_ctx) < 0)
         {
             swr_free(&swr_ctx);
-            if (tmp_ch_layout_allocated)
-                av_channel_layout_uninit(&tmp_ch_layout);
+            av_channel_layout_uninit(&src_ch_layout);
             if (!(output_fmt_ctx->oformat->flags & AVFMT_NOFILE))
                 avio_closep(&output_fmt_ctx->pb);
             avformat_free_context(output_fmt_ctx);
@@ -315,8 +324,8 @@ class DecodeAudioToFmtWorker : public AsyncWorker
             frame_size = 1152; // 默认帧大小
         }
 
-        // 创建FIFO缓冲区用于累积样本
-        AVAudioFifo *fifo = av_audio_fifo_alloc(encoder_ctx->sample_fmt, out_channels, frame_size * 2);
+        // 【关键修复3】创建FIFO缓冲区 - 使用实际的采样格式
+        AVAudioFifo *fifo = av_audio_fifo_alloc(encoder_ctx->sample_fmt, out_channels, frame_size * 4);
         if (!fifo)
         {
             av_packet_free(&output_packet);
@@ -324,8 +333,7 @@ class DecodeAudioToFmtWorker : public AsyncWorker
             av_frame_free(&decoded_frame);
             av_packet_free(&input_packet);
             swr_free(&swr_ctx);
-            if (tmp_ch_layout_allocated)
-                av_channel_layout_uninit(&tmp_ch_layout);
+            av_channel_layout_uninit(&src_ch_layout);
             if (!(output_fmt_ctx->oformat->flags & AVFMT_NOFILE))
                 avio_closep(&output_fmt_ctx->pb);
             avformat_free_context(output_fmt_ctx);
@@ -351,14 +359,16 @@ class DecodeAudioToFmtWorker : public AsyncWorker
                             swr_get_delay(swr_ctx, src_sample_rate) + decoded_frame->nb_samples,
                             out_sample_rate, src_sample_rate, AV_ROUND_UP);
 
-                        // 分配重采样帧
+                        // 【关键修复4】正确分配重采样帧
+                        av_frame_unref(resampled_frame);
                         resampled_frame->format = encoder_ctx->sample_fmt;
-                        resampled_frame->ch_layout = out_ch_layout;
+                        av_channel_layout_copy(&resampled_frame->ch_layout, &out_ch_layout);
                         resampled_frame->sample_rate = out_sample_rate;
                         resampled_frame->nb_samples = dst_nb_samples;
 
                         if (av_frame_get_buffer(resampled_frame, 0) < 0)
                         {
+                            av_frame_unref(decoded_frame);
                             continue;
                         }
 
@@ -370,31 +380,40 @@ class DecodeAudioToFmtWorker : public AsyncWorker
                         if (converted_samples > 0)
                         {
                             // 将重采样的数据写入FIFO
-                            av_audio_fifo_write(fifo, (void **)resampled_frame->data, converted_samples);
+                            if (av_audio_fifo_write(fifo, (void **)resampled_frame->data, converted_samples) < converted_samples)
+                            {
+                                av_frame_unref(decoded_frame);
+                                continue;
+                            }
 
                             // 当FIFO中有足够的样本时,编码帧
                             while (av_audio_fifo_size(fifo) >= frame_size)
                             {
                                 AVFrame *encode_frame = av_frame_alloc();
+                                if (!encode_frame)
+                                    break;
+
                                 encode_frame->format = encoder_ctx->sample_fmt;
-                                encode_frame->ch_layout = out_ch_layout;
+                                av_channel_layout_copy(&encode_frame->ch_layout, &out_ch_layout);
                                 encode_frame->sample_rate = out_sample_rate;
                                 encode_frame->nb_samples = frame_size;
 
                                 if (av_frame_get_buffer(encode_frame, 0) >= 0)
                                 {
-                                    av_audio_fifo_read(fifo, (void **)encode_frame->data, frame_size);
-                                    encode_frame->pts = pts;
-                                    pts += frame_size;
-
-                                    if (avcodec_send_frame(encoder_ctx, encode_frame) == 0)
+                                    if (av_audio_fifo_read(fifo, (void **)encode_frame->data, frame_size) == frame_size)
                                     {
-                                        while (avcodec_receive_packet(encoder_ctx, output_packet) == 0)
+                                        encode_frame->pts = pts;
+                                        pts += frame_size;
+
+                                        if (avcodec_send_frame(encoder_ctx, encode_frame) == 0)
                                         {
-                                            output_packet->stream_index = 0;
-                                            av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base);
-                                            av_interleaved_write_frame(output_fmt_ctx, output_packet);
-                                            av_packet_unref(output_packet);
+                                            while (avcodec_receive_packet(encoder_ctx, output_packet) == 0)
+                                            {
+                                                output_packet->stream_index = 0;
+                                                av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base);
+                                                av_interleaved_write_frame(output_fmt_ctx, output_packet);
+                                                av_packet_unref(output_packet);
+                                            }
                                         }
                                     }
                                 }
@@ -402,7 +421,7 @@ class DecodeAudioToFmtWorker : public AsyncWorker
                             }
                         }
 
-                        av_frame_unref(resampled_frame);
+                        av_frame_unref(decoded_frame);
                     }
                 }
             }
@@ -417,8 +436,9 @@ class DecodeAudioToFmtWorker : public AsyncWorker
                 swr_get_delay(swr_ctx, src_sample_rate) + decoded_frame->nb_samples,
                 out_sample_rate, src_sample_rate, AV_ROUND_UP);
 
+            av_frame_unref(resampled_frame);
             resampled_frame->format = encoder_ctx->sample_fmt;
-            resampled_frame->ch_layout = out_ch_layout;
+            av_channel_layout_copy(&resampled_frame->ch_layout, &out_ch_layout);
             resampled_frame->sample_rate = out_sample_rate;
             resampled_frame->nb_samples = dst_nb_samples;
 
@@ -435,25 +455,30 @@ class DecodeAudioToFmtWorker : public AsyncWorker
                     while (av_audio_fifo_size(fifo) >= frame_size)
                     {
                         AVFrame *encode_frame = av_frame_alloc();
+                        if (!encode_frame)
+                            break;
+
                         encode_frame->format = encoder_ctx->sample_fmt;
-                        encode_frame->ch_layout = out_ch_layout;
+                        av_channel_layout_copy(&encode_frame->ch_layout, &out_ch_layout);
                         encode_frame->sample_rate = out_sample_rate;
                         encode_frame->nb_samples = frame_size;
 
                         if (av_frame_get_buffer(encode_frame, 0) >= 0)
                         {
-                            av_audio_fifo_read(fifo, (void **)encode_frame->data, frame_size);
-                            encode_frame->pts = pts;
-                            pts += frame_size;
-
-                            if (avcodec_send_frame(encoder_ctx, encode_frame) == 0)
+                            if (av_audio_fifo_read(fifo, (void **)encode_frame->data, frame_size) == frame_size)
                             {
-                                while (avcodec_receive_packet(encoder_ctx, output_packet) == 0)
+                                encode_frame->pts = pts;
+                                pts += frame_size;
+
+                                if (avcodec_send_frame(encoder_ctx, encode_frame) == 0)
                                 {
-                                    output_packet->stream_index = 0;
-                                    av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base);
-                                    av_interleaved_write_frame(output_fmt_ctx, output_packet);
-                                    av_packet_unref(output_packet);
+                                    while (avcodec_receive_packet(encoder_ctx, output_packet) == 0)
+                                    {
+                                        output_packet->stream_index = 0;
+                                        av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base);
+                                        av_interleaved_write_frame(output_fmt_ctx, output_packet);
+                                        av_packet_unref(output_packet);
+                                    }
                                 }
                             }
                         }
@@ -461,14 +486,23 @@ class DecodeAudioToFmtWorker : public AsyncWorker
                     }
                 }
             }
-            av_frame_unref(resampled_frame);
+            av_frame_unref(decoded_frame);
         }
 
         // 刷新重采样器中剩余的样本
-        int converted_samples = swr_convert(swr_ctx, resampled_frame->data, frame_size, nullptr, 0);
-        if (converted_samples > 0)
+        av_frame_unref(resampled_frame);
+        resampled_frame->format = encoder_ctx->sample_fmt;
+        av_channel_layout_copy(&resampled_frame->ch_layout, &out_ch_layout);
+        resampled_frame->sample_rate = out_sample_rate;
+        resampled_frame->nb_samples = frame_size;
+
+        if (av_frame_get_buffer(resampled_frame, 0) >= 0)
         {
-            av_audio_fifo_write(fifo, (void **)resampled_frame->data, converted_samples);
+            int converted_samples = swr_convert(swr_ctx, resampled_frame->data, frame_size, nullptr, 0);
+            if (converted_samples > 0)
+            {
+                av_audio_fifo_write(fifo, (void **)resampled_frame->data, converted_samples);
+            }
         }
 
         // 处理FIFO中剩余的样本
@@ -476,25 +510,30 @@ class DecodeAudioToFmtWorker : public AsyncWorker
         {
             int remaining = av_audio_fifo_size(fifo);
             AVFrame *encode_frame = av_frame_alloc();
+            if (!encode_frame)
+                break;
+
             encode_frame->format = encoder_ctx->sample_fmt;
-            encode_frame->ch_layout = out_ch_layout;
+            av_channel_layout_copy(&encode_frame->ch_layout, &out_ch_layout);
             encode_frame->sample_rate = out_sample_rate;
             encode_frame->nb_samples = remaining;
 
             if (av_frame_get_buffer(encode_frame, 0) >= 0)
             {
-                av_audio_fifo_read(fifo, (void **)encode_frame->data, remaining);
-                encode_frame->pts = pts;
-                pts += remaining;
-
-                if (avcodec_send_frame(encoder_ctx, encode_frame) == 0)
+                if (av_audio_fifo_read(fifo, (void **)encode_frame->data, remaining) == remaining)
                 {
-                    while (avcodec_receive_packet(encoder_ctx, output_packet) == 0)
+                    encode_frame->pts = pts;
+                    pts += remaining;
+
+                    if (avcodec_send_frame(encoder_ctx, encode_frame) == 0)
                     {
-                        output_packet->stream_index = 0;
-                        av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base);
-                        av_interleaved_write_frame(output_fmt_ctx, output_packet);
-                        av_packet_unref(output_packet);
+                        while (avcodec_receive_packet(encoder_ctx, output_packet) == 0)
+                        {
+                            output_packet->stream_index = 0;
+                            av_packet_rescale_ts(output_packet, encoder_ctx->time_base, output_stream->time_base);
+                            av_interleaved_write_frame(output_fmt_ctx, output_packet);
+                            av_packet_unref(output_packet);
+                        }
                     }
                 }
             }
@@ -524,9 +563,8 @@ class DecodeAudioToFmtWorker : public AsyncWorker
         av_frame_free(&decoded_frame);
         av_packet_free(&input_packet);
         swr_free(&swr_ctx);
-        if (tmp_ch_layout_allocated)
-            av_channel_layout_uninit(&tmp_ch_layout);
-        
+        av_channel_layout_uninit(&src_ch_layout);
+
         if (!(output_fmt_ctx->oformat->flags & AVFMT_NOFILE))
             avio_closep(&output_fmt_ctx->pb);
         avformat_free_context(output_fmt_ctx);