Skip to content

Commit ec64e51

Browse files
Fix H264/H265(HEVC) not working in MP4 (#3)
* Fix H264/H265(HEVC) not working in MP4 Notes: - ffmpeg's avcodec can't translate annexb to avcc or hvcc for vaapi output - the wrapper's DoAddTrack does not properly add extradata - the IOPluginProps.h uses pIOPropContainerList, but due to how the UI selects the container, the list is only ever one item, which is the current container we selected. We use that to determine if it's mp4 or not. This was tested by printing the entire thing to see its values. - conversion and extradata are not needed for mov, and in fact can delay playback. SO + new function for annexb to avcc + new function for annex b to hvcc + new code for Injecting extradata into first keyframe + new code to limit the changes to h264/hevc + mp4
1 parent 2d61295 commit ec64e51

File tree

2 files changed

+255
-7
lines changed

2 files changed

+255
-7
lines changed

vaapi_encoder.cpp

Lines changed: 252 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,198 @@ static const uint8_t uuid_hevc_10[] = { 0x01, 0x6f, 0x34, 0x71, 0x31, 0x17, 0x42
2525
static const uint8_t uuid_av1_8[] = { 0x01, 0x6f, 0x34, 0x71, 0x31, 0x17, 0x42, 0x05, 0xbf, 0x55, 0x37, 0x1c, 0xb0, 0xac, 0x66, 0x23 };
2626
static const uint8_t uuid_av1_10[] = { 0x01, 0x6f, 0x34, 0x71, 0x31, 0x17, 0x42, 0x05, 0xbf, 0x55, 0x37, 0x1c, 0xb0, 0xac, 0x66, 0x24 };
2727

28+
// H264 MP4
29+
static bool ConvertAnnexBToAVCC(const uint8_t* annexb_data, size_t annexb_size,
30+
std::vector<uint8_t>& out_avcc)
31+
{
32+
if (!annexb_data || annexb_size < 6) return false;
33+
34+
size_t offset = 0;
35+
std::vector<std::vector<uint8_t>> sps_list;
36+
std::vector<std::vector<uint8_t>> pps_list;
37+
38+
while (offset + 4 < annexb_size) {
39+
if (annexb_data[offset] != 0 || annexb_data[offset+1] != 0 ||
40+
annexb_data[offset+2] != 0 || annexb_data[offset+3] != 1) {
41+
offset++;
42+
continue;
43+
}
44+
45+
size_t nal_start = offset + 4;
46+
size_t nal_end = annexb_size;
47+
48+
for (size_t i = nal_start; i + 4 < annexb_size; ++i) {
49+
if (annexb_data[i] == 0 && annexb_data[i+1] == 0 &&
50+
annexb_data[i+2] == 0 && annexb_data[i+3] == 1) {
51+
nal_end = i;
52+
break;
53+
}
54+
}
55+
56+
size_t nal_size = nal_end - nal_start;
57+
if (nal_size < 1) {
58+
offset = nal_end;
59+
continue;
60+
}
61+
62+
uint8_t nal_type = annexb_data[nal_start] & 0x1F;
63+
std::vector<uint8_t> nal(annexb_data + nal_start, annexb_data + nal_end);
64+
65+
if (nal_type == 7) sps_list.push_back(nal);
66+
else if (nal_type == 8) pps_list.push_back(nal);
67+
68+
offset = nal_end;
69+
}
70+
71+
if (sps_list.empty() || pps_list.empty()) return false;
72+
73+
const std::vector<uint8_t>& sps = sps_list[0];
74+
75+
out_avcc.clear();
76+
out_avcc.push_back(0x01); // configurationVersion
77+
out_avcc.push_back(sps[1]); // AVCProfileIndication
78+
out_avcc.push_back(sps[2]); // profile_compatibility
79+
out_avcc.push_back(sps[3]); // AVCLevelIndication
80+
out_avcc.push_back(0xFF); // lengthSizeMinusOne = 4 bytes
81+
82+
out_avcc.push_back(0xE0 | sps_list.size());
83+
for (const auto& s : sps_list) {
84+
out_avcc.push_back((s.size() >> 8) & 0xFF);
85+
out_avcc.push_back(s.size() & 0xFF);
86+
out_avcc.insert(out_avcc.end(), s.begin(), s.end());
87+
}
88+
89+
out_avcc.push_back(pps_list.size());
90+
for (const auto& p : pps_list) {
91+
out_avcc.push_back((p.size() >> 8) & 0xFF);
92+
out_avcc.push_back(p.size() & 0xFF);
93+
out_avcc.insert(out_avcc.end(), p.begin(), p.end());
94+
}
95+
96+
return true;
97+
}
98+
99+
// HEVC MP4
100+
static bool ConvertAnnexBToHVCC(const uint8_t* annexb_data, size_t annexb_size,
101+
std::vector<uint8_t>& out_hvcc,
102+
uint8_t bitDepthLuma, uint8_t bitDepthChroma,
103+
uint32_t frameRateNum = 0, uint32_t frameRateDen = 0)
104+
{
105+
if (!annexb_data || annexb_size < 6) return false;
106+
107+
size_t offset = 0;
108+
std::vector<std::vector<uint8_t>> vps_list;
109+
std::vector<std::vector<uint8_t>> sps_list;
110+
std::vector<std::vector<uint8_t>> pps_list;
111+
112+
while (offset + 4 < annexb_size) {
113+
// Detect start code
114+
size_t start_code_size = 0;
115+
if (annexb_data[offset] == 0 && annexb_data[offset+1] == 0 && annexb_data[offset+2] == 1) {
116+
start_code_size = 3;
117+
} else if (annexb_data[offset] == 0 && annexb_data[offset+1] == 0 && annexb_data[offset+2] == 0 && annexb_data[offset+3] == 1) {
118+
start_code_size = 4;
119+
} else {
120+
offset++;
121+
continue;
122+
}
123+
124+
offset += start_code_size;
125+
size_t nal_start = offset;
126+
size_t nal_end = annexb_size;
127+
128+
for (size_t i = offset; i + 3 < annexb_size; ++i) {
129+
if (annexb_data[i] == 0 && annexb_data[i+1] == 0 && (annexb_data[i+2] == 1 || (annexb_data[i+2] == 0 && annexb_data[i+3] == 1))) {
130+
nal_end = i;
131+
break;
132+
}
133+
}
134+
135+
if (nal_end <= nal_start) continue;
136+
137+
uint8_t nal_unit_type = (annexb_data[nal_start] >> 1) & 0x3F;
138+
std::vector<uint8_t> nal(annexb_data + nal_start, annexb_data + nal_end);
139+
140+
switch (nal_unit_type) {
141+
case 32: vps_list.push_back(nal); break; // VPS
142+
case 33: sps_list.push_back(nal); break; // SPS
143+
case 34: pps_list.push_back(nal); break; // PPS
144+
default: break;
145+
}
146+
147+
offset = nal_end;
148+
}
149+
150+
if (vps_list.empty() || sps_list.empty() || pps_list.empty())
151+
return false;
152+
153+
const std::vector<uint8_t>& sps = sps_list[0];
154+
155+
uint8_t general_profile_space = (sps[1] >> 6) & 0x03;
156+
uint8_t general_tier_flag = (sps[1] >> 5) & 0x01;
157+
uint8_t general_profile_idc = sps[1] & 0x1F;
158+
uint32_t general_profile_compatibility_flags = (sps[2] << 24) | (sps[3] << 16) | (sps[4] << 8) | sps[5];
159+
uint64_t general_constraint_indicator_flags =
160+
((uint64_t)sps[6] << 40) | ((uint64_t)sps[7] << 32) |
161+
((uint64_t)sps[8] << 24) | ((uint64_t)sps[9] << 16) |
162+
((uint64_t)sps[10] << 8) | sps[11];
163+
uint8_t general_level_idc = sps[12];
164+
165+
out_hvcc.clear();
166+
out_hvcc.push_back(1); // configurationVersion
167+
out_hvcc.push_back((general_profile_space << 6) | (general_tier_flag << 5) | general_profile_idc);
168+
out_hvcc.push_back((general_profile_compatibility_flags >> 24) & 0xFF);
169+
out_hvcc.push_back((general_profile_compatibility_flags >> 16) & 0xFF);
170+
out_hvcc.push_back((general_profile_compatibility_flags >> 8) & 0xFF);
171+
out_hvcc.push_back(general_profile_compatibility_flags & 0xFF);
172+
173+
for (int i = 5; i >= 0; --i)
174+
out_hvcc.push_back((general_constraint_indicator_flags >> (i * 8)) & 0xFF);
175+
176+
out_hvcc.push_back(general_level_idc);
177+
178+
// reserved (4 bits) + min_spatial_segmentation_idc (12 bits)
179+
out_hvcc.push_back(0xF0); // reserved (4 bits set to 1)
180+
out_hvcc.push_back(0x00); // min_spatial_segmentation_idc = 0
181+
182+
out_hvcc.push_back(0xFC); // reserved + parallelismType = 0 (unknown)
183+
out_hvcc.push_back(0xFC | (0 & 0x03)); // reserved + chromaFormatIdc=0 (monochrome, will be overridden if needed)
184+
185+
out_hvcc.push_back(0xF8 | ((bitDepthLuma - 8) & 0x07)); // bitDepthLumaMinus8
186+
out_hvcc.push_back(0xF8 | ((bitDepthChroma - 8) & 0x07)); // bitDepthChromaMinus8
187+
188+
if (frameRateNum > 0 && frameRateDen > 0) {
189+
uint32_t avgFrameRate = (frameRateNum * 1000 + (frameRateDen/2)) / frameRateDen; // FPS x1000
190+
out_hvcc.push_back((avgFrameRate >> 8) & 0xFF);
191+
out_hvcc.push_back(avgFrameRate & 0xFF);
192+
} else {
193+
out_hvcc.push_back(0x00);
194+
out_hvcc.push_back(0x00);
195+
}
196+
197+
// constantFrameRate = 1 (fixed framerate), numTemporalLayers = 0, temporalIdNested = 1, lengthSizeMinusOne = 3
198+
out_hvcc.push_back((1 << 6) | (0 << 3) | (1 << 2) | 3);
199+
200+
out_hvcc.push_back(3); // numOfArrays (VPS + SPS + PPS)
201+
202+
auto append_array = [&](uint8_t nal_unit_type, const std::vector<std::vector<uint8_t>>& nals) {
203+
out_hvcc.push_back(0x80 | nal_unit_type); // array_completeness + NAL unit type
204+
out_hvcc.push_back(static_cast<uint8_t>(nals.size()));
205+
for (const auto& nal : nals) {
206+
out_hvcc.push_back((nal.size() >> 8) & 0xFF);
207+
out_hvcc.push_back(nal.size() & 0xFF);
208+
out_hvcc.insert(out_hvcc.end(), nal.begin(), nal.end());
209+
}
210+
};
211+
212+
append_array(32, vps_list); // VPS
213+
append_array(33, sps_list); // SPS
214+
append_array(34, pps_list); // PPS
215+
216+
return true;
217+
}
218+
219+
28220
class UISettingsController
29221
{
30222
public:
@@ -380,6 +572,13 @@ StatusCode VAAPIEncoder::DoOpen(HostBufferRef *p_pBuff)
380572

381573
UISettingsController settings(m_CommonProps);
382574
settings.Load(p_pBuff);
575+
std::string container;
576+
if (p_pBuff->GetString(pIOPropContainerList, container)) {
577+
g_Log(logLevelInfo, "✅ Selected container: %s\n", container.c_str());
578+
m_containerFormat = container;
579+
} else {
580+
g_Log(logLevelError, "❌ Failed to retrieve container from pIOPropContainerList\n");
581+
}
383582

384583
int16_t primaries = 0;
385584
if (!p_pBuff->GetINT16(pIOPropColorPrimaries, primaries))
@@ -463,8 +662,43 @@ StatusCode VAAPIEncoder::DoOpen(HostBufferRef *p_pBuff)
463662
return errFail;
464663
}
465664

466-
if (m_codec->extradata_size)
467-
p_pBuff->SetProperty(pIOPropMagicCookie, propTypeUInt8, m_codec->extradata, m_codec->extradata_size);
665+
if (m_codec->extradata_size) {
666+
if (m_containerFormat == "mp4") {
667+
if (!strcmp(m_name, "h264_vaapi")) {
668+
std::vector<uint8_t> avcc;
669+
if (ConvertAnnexBToAVCC(m_codec->extradata, m_codec->extradata_size, avcc)) {
670+
p_pBuff->SetProperty(pIOPropMagicCookie, propTypeUInt8, avcc.data(), static_cast<int>(avcc.size()));
671+
m_configExtradata = std::move(avcc);
672+
m_sentFirstPacket = false;
673+
} else {
674+
g_Log(logLevelError, "VAAPI :: Failed to convert H.264 extradata to AVCC");
675+
}
676+
} else if (!strcmp(m_name, "hevc_vaapi")) {
677+
std::vector<uint8_t> hvcc;
678+
uint8_t bitDepthLumaMinus8 = m_depth - 8;
679+
uint8_t bitDepthChromaMinus8 = m_depth - 8;
680+
uint32_t avgFrameRateNum = m_CommonProps.GetFrameRateNum();
681+
uint32_t avgFrameRateDen = m_CommonProps.GetFrameRateDen();
682+
683+
if (ConvertAnnexBToHVCC(m_codec->extradata, m_codec->extradata_size, hvcc,
684+
bitDepthLumaMinus8, bitDepthChromaMinus8,
685+
avgFrameRateNum, avgFrameRateDen))
686+
{
687+
p_pBuff->SetProperty(pIOPropMagicCookie, propTypeUInt8, hvcc.data(), static_cast<int>(hvcc.size()));
688+
m_configExtradata = std::move(hvcc);
689+
m_sentFirstPacket = false;
690+
} else {
691+
g_Log(logLevelError, "VAAPI :: Failed to convert HEVC extradata to hvcC");
692+
}
693+
} else {
694+
// AV1 MP4
695+
p_pBuff->SetProperty(pIOPropMagicCookie, propTypeUInt8, m_codec->extradata, m_codec->extradata_size);
696+
}
697+
} else {
698+
// MOV
699+
p_pBuff->SetProperty(pIOPropMagicCookie, propTypeUInt8, m_codec->extradata, m_codec->extradata_size);
700+
}
701+
}
468702

469703
uint8_t multiPass = 0;
470704
p_pBuff->SetProperty(pIOPropMultiPass, propTypeUInt8, &multiPass, 1);
@@ -588,17 +822,28 @@ StatusCode VAAPIEncoder::ReceiveData()
588822
if (!outBuf.IsValid() || !outBuf.Resize(pkt->size))
589823
return errAlloc;
590824

825+
uint8_t isKeyFrame = pkt->flags & AV_PKT_FLAG_KEY;
826+
591827
char *buf = nullptr;
592828
size_t bufSize = 0;
593-
if (!outBuf.LockBuffer(&buf, &bufSize))
594-
return errAlloc;
829+
if (!m_sentFirstPacket && isKeyFrame && !m_configExtradata.empty() && m_containerFormat == "mp4") {
830+
size_t totalSize = m_configExtradata.size() + pkt->size;
831+
if (!outBuf.Resize(totalSize)) return errAlloc;
832+
833+
if (!outBuf.LockBuffer(&buf, &bufSize)) return errAlloc;
595834

596-
memcpy(buf, pkt->data, pkt->size);
835+
memcpy(buf, m_configExtradata.data(), m_configExtradata.size());
836+
memcpy(buf + m_configExtradata.size(), pkt->data, pkt->size);
837+
838+
m_sentFirstPacket = true;
839+
g_Log(logLevelInfo, "VAAPI :: Injected extradata into first keyframe");
840+
} else {
841+
if (!outBuf.LockBuffer(&buf, &bufSize)) return errAlloc;
842+
memcpy(buf, pkt->data, pkt->size);
843+
}
597844

598845
outBuf.SetProperty(pIOPropPTS, propTypeInt64, &pkt->pts, 1);
599846
outBuf.SetProperty(pIOPropDTS, propTypeInt64, &pkt->dts, 1);
600-
601-
uint8_t isKeyFrame = pkt->flags & AV_PKT_FLAG_KEY;
602847
outBuf.SetProperty(pIOPropIsKeyFrame, propTypeUInt8, &isKeyFrame, 1);
603848

604849
av_packet_unref(pkt);

vaapi_encoder.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,7 @@ class VAAPIEncoder : public IPluginCodecRef
4444
int m_ColorModel;
4545
std::unique_ptr<UISettingsController> m_pSettings;
4646
HostCodecConfigCommon m_CommonProps;
47+
std::vector<uint8_t> m_configExtradata;
48+
bool m_sentFirstPacket = false;
49+
std::string m_containerFormat;
4750
};

0 commit comments

Comments
 (0)