diff --git a/api/crypto/frame_crypto_transformer.cc b/api/crypto/frame_crypto_transformer.cc index 88c394ef10..d96b3152a4 100644 --- a/api/crypto/frame_crypto_transformer.cc +++ b/api/crypto/frame_crypto_transformer.cc @@ -32,6 +32,7 @@ #include "absl/types/variant.h" #include "api/array_view.h" #include "common_video/h264/h264_common.h" +#include "common_video/h265/h265_common.h" #include "modules/rtp_rtcp/source/rtp_format_h264.h" #include "rtc_base/byte_buffer.h" #include "rtc_base/logging.h" @@ -96,6 +97,40 @@ inline bool FrameIsH264(webrtc::TransformableFrameInterface* frame, } } +inline bool FrameIsH265(webrtc::TransformableFrameInterface* frame, + webrtc::FrameCryptorTransformer::MediaType type) { + switch (type) { + case webrtc::FrameCryptorTransformer::MediaType::kVideoFrame: { + auto videoFrame = + static_cast(frame); + return videoFrame->header().codec == + webrtc::VideoCodecType::kVideoCodecH265; + } + default: + return false; + } +} + +inline bool IsH265SliceNalu(webrtc::H265::NaluType nalu_type) { + // VCL NALUs (Video Coding Layer) - slice segments + return nalu_type == webrtc::H265::NaluType::kTrailN || + nalu_type == webrtc::H265::NaluType::kTrailR || + nalu_type == webrtc::H265::NaluType::kTsaN || + nalu_type == webrtc::H265::NaluType::kTsaR || + nalu_type == webrtc::H265::NaluType::kStsaN || + nalu_type == webrtc::H265::NaluType::kStsaR || + nalu_type == webrtc::H265::NaluType::kRadlN || + nalu_type == webrtc::H265::NaluType::kRadlR || + nalu_type == webrtc::H265::NaluType::kRaslN || + nalu_type == webrtc::H265::NaluType::kRaslR || + nalu_type == webrtc::H265::NaluType::kBlaWLp || + nalu_type == webrtc::H265::NaluType::kBlaWRadl || + nalu_type == webrtc::H265::NaluType::kBlaNLp || + nalu_type == webrtc::H265::NaluType::kIdrWRadl || + nalu_type == webrtc::H265::NaluType::kIdrNLp || + nalu_type == webrtc::H265::NaluType::kCra; +} + inline bool NeedsRbspUnescaping(const uint8_t* frameData, size_t frameSize) { for (size_t i = 0; i < frameSize - 3; ++i) { if (frameData[i] == 0 && frameData[i + 1] == 0 && frameData[i + 2] == 3) @@ -163,6 +198,27 @@ uint8_t get_unencrypted_bytes(webrtc::TransformableFrameInterface* frame, break; } } + } else if (videoFrame->header().codec == + webrtc::VideoCodecType::kVideoCodecH265) { + rtc::ArrayView data_in = frame->GetData(); + std::vector nalu_indices = + webrtc::H265::FindNaluIndices(data_in); + + int idx = 0; + for (const auto& index : nalu_indices) { + const uint8_t* slice = data_in.data() + index.payload_start_offset; + webrtc::H265::NaluType nalu_type = + webrtc::H265::ParseNaluType(slice[0]); + if (IsH265SliceNalu(nalu_type)) { + // H.265 has a 2-byte NALU header, so unencrypted bytes = offset + header size + unencrypted_bytes = index.payload_start_offset + webrtc::H265::kNaluHeaderSize; + RTC_LOG(LS_INFO) + << "H265 NonParameterSetNalu::payload_size: " << index.payload_size + << ", nalu_type " << static_cast(nalu_type) << ", NaluIndex [" << idx++ + << "] offset: " << index.payload_start_offset << ", unencrypted_bytes: " << unencrypted_bytes; + return unencrypted_bytes; + } + } } break; } @@ -413,6 +469,9 @@ void FrameCryptorTransformer::encryptFrame( if (FrameIsH264(frame.get(), type_)) { H264::WriteRbsp(data_without_header.data(), data_without_header.size(), &data_out); + } else if (FrameIsH265(frame.get(), type_)) { + H265::WriteRbsp(data_without_header.data(), data_without_header.size(), + &data_out); } else { data_out.AppendData(data_without_header); RTC_CHECK_EQ(data_out.size(), frame_header.size() + @@ -561,6 +620,10 @@ void FrameCryptorTransformer::decryptFrame( NeedsRbspUnescaping(encrypted_buffer.data(), encrypted_buffer.size())) { encrypted_buffer.SetData( H264::ParseRbsp(encrypted_buffer.data(), encrypted_buffer.size())); + } else if (FrameIsH265(frame.get(), type_) && + NeedsRbspUnescaping(encrypted_buffer.data(), encrypted_buffer.size())) { + encrypted_buffer.SetData( + H265::ParseRbsp(encrypted_buffer.data(), encrypted_buffer.size())); } rtc::Buffer encrypted_payload(encrypted_buffer.size() - ivLength - 2); diff --git a/common_video/h265/h265_common.h b/common_video/h265/h265_common.h index 7bba7f84a7..745b5ea5a1 100644 --- a/common_video/h265/h265_common.h +++ b/common_video/h265/h265_common.h @@ -43,6 +43,8 @@ enum NaluType : uint8_t { kStsaR = 5, kRadlN = 6, kRadlR = 7, + kRaslN = 8, + kRaslR = 9, kBlaWLp = 16, kBlaWRadl = 17, kBlaNLp = 18, diff --git a/sdk/BUILD.gn b/sdk/BUILD.gn index a710d600d6..152fbce7c7 100644 --- a/sdk/BUILD.gn +++ b/sdk/BUILD.gn @@ -759,6 +759,16 @@ if (is_ios || is_mac) { ] } + if (rtc_use_h265) { + sources += [ + "objc/components/video_codec/RTCCodecSpecificInfoH265+Private.h", + "objc/components/video_codec/RTCCodecSpecificInfoH265.h", + "objc/components/video_codec/RTCCodecSpecificInfoH265.mm", + "objc/components/video_codec/RTCH265ProfileLevelId.h", + "objc/components/video_codec/RTCH265ProfileLevelId.mm", + ] + } + public_configs = [ ":common_config_objc" ] deps = [ ":base_objc", @@ -1516,6 +1526,15 @@ if (is_ios || is_mac) { "objc/components/audio/RTCAudioProcessingConfig.h", ] + if (rtc_use_h265) { + common_objc_headers += [ + "objc/components/video_codec/RTCCodecSpecificInfoH265.h", + "objc/components/video_codec/RTCH265ProfileLevelId.h", + "objc/components/video_codec/RTCVideoDecoderH265.h", + "objc/components/video_codec/RTCVideoEncoderH265.h", + ] + } + if (target_environment != "xrdevice" && target_environment != "xrsimulator") { common_objc_headers += [ "objc/helpers/RTCCameraPreviewView.h", @@ -1919,6 +1938,17 @@ if (is_ios || is_mac) { "objc/components/video_codec/RTCVideoEncoderH264.mm", ] + if (rtc_use_h265) { + sources += [ + "objc/components/video_codec/RTCVideoDecoderH265.h", + "objc/components/video_codec/RTCVideoDecoderH265.mm", + "objc/components/video_codec/RTCVideoEncoderH265.h", + "objc/components/video_codec/RTCVideoEncoderH265.mm", + "objc/components/video_codec/RTCVideoFrameReorderQueue.h", + "objc/components/video_codec/RTCVideoFrameReorderQueue.mm", + ] + } + configs += [ "..:common_objc", ":used_from_extension", diff --git a/sdk/objc/components/capturer/RTCCameraVideoCapturer.m b/sdk/objc/components/capturer/RTCCameraVideoCapturer.m index 55141ac9d0..fb5dd63492 100644 --- a/sdk/objc/components/capturer/RTCCameraVideoCapturer.m +++ b/sdk/objc/components/capturer/RTCCameraVideoCapturer.m @@ -303,7 +303,7 @@ - (void)stopCaptureWithCompletionHandler: #if TARGET_WATCH_DEVICE_ROTATION - (void)deviceOrientationDidChange:(NSNotification *)notification { [RTC_OBJC_TYPE(RTCDispatcher) - dispatchAsyncOnType:RTC_OBJC_TYPE(RTCDispatcherTypeCaptureSession) + dispatchAsyncOnType:RTC_OBJC_TYPE(RTCDispatcherTypeMain) block:^{ [self updateOrientation]; }]; diff --git a/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265+Private.h b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265+Private.h new file mode 100644 index 0000000000..24070cd7f2 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265+Private.h @@ -0,0 +1,25 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +/* This file is borrowed from sdk/objc/components/video_codec/RTCCodecSpecificInfoH264+Private.h */ + +#import "RTCCodecSpecificInfoH265.h" + +#include "modules/video_coding/include/video_codec_interface.h" + +NS_ASSUME_NONNULL_BEGIN + +/* Interfaces for converting to/from internal C++ formats. */ +@interface RTC_OBJC_TYPE (RTCCodecSpecificInfoH265) () + +- (webrtc::CodecSpecificInfo)nativeCodecSpecificInfo; + +@end + +NS_ASSUME_NONNULL_END \ No newline at end of file diff --git a/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.h b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.h new file mode 100644 index 0000000000..38f0bce31c --- /dev/null +++ b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.h @@ -0,0 +1,28 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +/* This file is borrowed from sdk/objc/components/video_codec/RTCCodecSpecificInfoH264.h. */ + +#import + +#import "RTCCodecSpecificInfo.h" +#import "RTCMacros.h" + +/** Class for H265 specific config. */ +typedef NS_ENUM(NSUInteger, RTCH265PacketizationMode) { + RTCH265PacketizationModeNonInterleaved = 0, // Mode 1 - STAP-A, FU-A is allowed + RTCH265PacketizationModeSingleNalUnit // Mode 0 - only single NALU allowed +}; + +RTC_OBJC_EXPORT +@interface RTC_OBJC_TYPE (RTCCodecSpecificInfoH265) : NSObject + +@property(nonatomic, assign) RTCH265PacketizationMode packetizationMode; + +@end \ No newline at end of file diff --git a/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.mm b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.mm new file mode 100644 index 0000000000..f5509c12f3 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCCodecSpecificInfoH265.mm @@ -0,0 +1,28 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + /* This file is borrowed from sdk/objc/components/video_codec/RTCCodecSpecificInfoH264.mm */ + +#import "RTCCodecSpecificInfoH265+Private.h" + +// H265 specific settings. +@implementation RTC_OBJC_TYPE (RTCCodecSpecificInfoH265) + +@synthesize packetizationMode = _packetizationMode; + +- (webrtc::CodecSpecificInfo)nativeCodecSpecificInfo { + webrtc::CodecSpecificInfo codecSpecificInfo; + codecSpecificInfo.codecType = webrtc::kVideoCodecH265; + codecSpecificInfo.codecSpecific.H264.packetization_mode = + (webrtc::H264PacketizationMode)_packetizationMode; + + return codecSpecificInfo; +} + +@end \ No newline at end of file diff --git a/sdk/objc/components/video_codec/RTCDefaultVideoDecoderFactory.m b/sdk/objc/components/video_codec/RTCDefaultVideoDecoderFactory.m index 9c1943565a..60e0bcddf9 100644 --- a/sdk/objc/components/video_codec/RTCDefaultVideoDecoderFactory.m +++ b/sdk/objc/components/video_codec/RTCDefaultVideoDecoderFactory.m @@ -15,6 +15,8 @@ #import "api/video_codec/RTCVideoCodecConstants.h" #import "api/video_codec/RTCVideoDecoderVP8.h" #import "api/video_codec/RTCVideoDecoderVP9.h" +#import "RTCH265ProfileLevelId.h" +#import "RTCVideoDecoderH265.h" #import "base/RTCVideoCodecInfo.h" #if defined(RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY) @@ -42,6 +44,9 @@ @implementation RTC_OBJC_TYPE (RTCDefaultVideoDecoderFactory) [[RTC_OBJC_TYPE(RTCVideoCodecInfo) alloc] initWithName:RTC_CONSTANT_TYPE(RTCVideoCodecH264Name) parameters:constrainedBaselineParams]; + RTC_OBJC_TYPE(RTCVideoCodecInfo) *h265Info = + [[RTC_OBJC_TYPE(RTCVideoCodecInfo) alloc] initWithName:RTC_CONSTANT_TYPE(RTCVideoCodecH265Name)]; + RTC_OBJC_TYPE(RTCVideoCodecInfo) *vp8Info = [[RTC_OBJC_TYPE(RTCVideoCodecInfo) alloc] initWithName:RTC_CONSTANT_TYPE(RTCVideoCodecVp8Name)]; @@ -49,6 +54,7 @@ @implementation RTC_OBJC_TYPE (RTCDefaultVideoDecoderFactory) constrainedHighInfo, constrainedBaselineInfo, vp8Info, + h265Info, ] mutableCopy]; if ([RTC_OBJC_TYPE(RTCVideoDecoderVP9) isSupported]) { @@ -68,6 +74,8 @@ @implementation RTC_OBJC_TYPE (RTCDefaultVideoDecoderFactory) return [[RTC_OBJC_TYPE(RTCVideoDecoderH264) alloc] init]; } else if ([info.name isEqualToString:RTC_CONSTANT_TYPE(RTCVideoCodecVp8Name)]) { return [RTC_OBJC_TYPE(RTCVideoDecoderVP8) vp8Decoder]; + } else if ([info.name isEqualToString:RTC_CONSTANT_TYPE(RTCVideoCodecH265Name)]) { + return [[RTC_OBJC_TYPE(RTCVideoDecoderH265) alloc] init]; } else if ([info.name isEqualToString:RTC_CONSTANT_TYPE(RTCVideoCodecVp9Name)] && [RTC_OBJC_TYPE(RTCVideoDecoderVP9) isSupported]) { return [RTC_OBJC_TYPE(RTCVideoDecoderVP9) vp9Decoder]; diff --git a/sdk/objc/components/video_codec/RTCDefaultVideoEncoderFactory.m b/sdk/objc/components/video_codec/RTCDefaultVideoEncoderFactory.m index df593f2311..1f3b37b77f 100644 --- a/sdk/objc/components/video_codec/RTCDefaultVideoEncoderFactory.m +++ b/sdk/objc/components/video_codec/RTCDefaultVideoEncoderFactory.m @@ -15,6 +15,8 @@ #import "api/video_codec/RTCVideoCodecConstants.h" #import "api/video_codec/RTCVideoEncoderVP8.h" #import "api/video_codec/RTCVideoEncoderVP9.h" +#import "RTCH265ProfileLevelId.h" +#import "RTCVideoEncoderH265.h" #import "base/RTCVideoCodecInfo.h" #if defined(RTC_USE_LIBAOM_AV1_ENCODER) @@ -75,6 +77,10 @@ @implementation RTC_OBJC_TYPE (RTCDefaultVideoEncoderFactory) } else if ([info.name isEqualToString:RTC_CONSTANT_TYPE(RTCVideoCodecVp9Name)] && [RTC_OBJC_TYPE(RTCVideoEncoderVP9) isSupported]) { return [RTC_OBJC_TYPE(RTCVideoEncoderVP9) vp9Encoder]; + } else if (@available(iOS 11, *)) { + if ([info.name isEqualToString:RTC_CONSTANT_TYPE(RTCVideoCodecH265Name)]) { + return [[RTC_OBJC_TYPE(RTCVideoEncoderH265) alloc] initWithCodecInfo:info]; + } } #if defined(RTC_USE_LIBAOM_AV1_ENCODER) diff --git a/sdk/objc/components/video_codec/RTCH265ProfileLevelId.h b/sdk/objc/components/video_codec/RTCH265ProfileLevelId.h new file mode 100644 index 0000000000..3f295a22a2 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCH265ProfileLevelId.h @@ -0,0 +1,16 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import + +#import "RTCMacros.h" + +RTC_OBJC_EXPORT extern NSString *const RTC_CONSTANT_TYPE(RTCVideoCodecH265Name); +RTC_OBJC_EXPORT extern NSString *const RTC_CONSTANT_TYPE(RTCLevel31Main); diff --git a/sdk/objc/components/video_codec/RTCH265ProfileLevelId.mm b/sdk/objc/components/video_codec/RTCH265ProfileLevelId.mm new file mode 100644 index 0000000000..fde3a7d7b2 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCH265ProfileLevelId.mm @@ -0,0 +1,18 @@ +/* + * Copyright 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#import "RTCH265ProfileLevelId.h" + +#include "media/base/media_constants.h" + +NSString *const RTC_CONSTANT_TYPE(RTCVideoCodecH265Name) = @"H265"; +// TODO(jianjunz): This is value is not correct. +NSString *const RTC_CONSTANT_TYPE(RTCLevel31Main) = @"4d001f"; diff --git a/sdk/objc/components/video_codec/RTCVideoDecoderH264.mm b/sdk/objc/components/video_codec/RTCVideoDecoderH264.mm index 21e2b805b0..72527f024b 100644 --- a/sdk/objc/components/video_codec/RTCVideoDecoderH264.mm +++ b/sdk/objc/components/video_codec/RTCVideoDecoderH264.mm @@ -255,11 +255,7 @@ - (int)resetDecompressionSession { - (void)configureDecompressionSession { RTC_DCHECK(_decompressionSession); -#if defined(WEBRTC_IOS) - VTSessionSetProperty(_decompressionSession, - kVTDecompressionPropertyKey_RealTime, - kCFBooleanTrue); -#endif + VTSessionSetProperty(_decompressionSession, kVTDecompressionPropertyKey_RealTime, kCFBooleanTrue); } - (void)destroyDecompressionSession { diff --git a/sdk/objc/components/video_codec/RTCVideoDecoderH265.h b/sdk/objc/components/video_codec/RTCVideoDecoderH265.h new file mode 100644 index 0000000000..6e77bdf40d --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoDecoderH265.h @@ -0,0 +1,23 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import + +#import "RTCMacros.h" +#import "RTCVideoDecoder.h" + +RTC_OBJC_EXPORT +@interface RTC_OBJC_TYPE (RTCVideoDecoderH265) : NSObject +- (NSInteger)setHVCCFormat:(const uint8_t *)data size:(size_t)size width:(uint16_t)width height:(uint16_t)height; +- (NSInteger)decodeData:(const uint8_t *)data + size:(size_t)size + timeStamp:(int64_t)timeStamp; +- (void)flush; +@end \ No newline at end of file diff --git a/sdk/objc/components/video_codec/RTCVideoDecoderH265.mm b/sdk/objc/components/video_codec/RTCVideoDecoderH265.mm new file mode 100644 index 0000000000..49df482977 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoDecoderH265.mm @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#import "RTCVideoDecoderH265.h" + +#import + +#import +#import "RTCVideoFrameReorderQueue.h" +#import "base/RTCVideoFrame.h" +#import "base/RTCVideoFrameBuffer.h" +#import "common_video/h265/h265_common.h" +#import "common_video/h265/h265_vps_parser.h" +#import "components/video_frame_buffer/RTCCVPixelBuffer.h" +#import "helpers.h" +#import "helpers/scoped_cftyperef.h" +#import "modules/video_coding/include/video_error_codes.h" +#import "nalu_rewriter.h" +#import "rtc_base/bitstream_reader.h" +#import "rtc_base/checks.h" +#import "rtc_base/logging.h" +#import "rtc_base/time_utils.h" + +// Struct that we pass to the decoder per frame to decode. We receive it again +// in the decoder callback. +struct RTCH265FrameDecodeParams { + RTCH265FrameDecodeParams(int64_t ts, uint64_t reorderSize) + : timestamp(ts), reorderSize(reorderSize) {} + int64_t timestamp; + uint64_t reorderSize{0}; +}; + +@interface RTC_OBJC_TYPE (RTCVideoDecoderH265) () +- (void)setError:(OSStatus)error; +- (void)processFrame:(RTC_OBJC_TYPE(RTCVideoFrame) *)decodedFrame reorderSize:(uint64_t)reorderSize; +@end + +static void overrideColorSpaceAttachments(CVImageBufferRef imageBuffer) { + CVBufferRemoveAttachment(imageBuffer, kCVImageBufferCGColorSpaceKey); + CVBufferSetAttachment(imageBuffer, kCVImageBufferColorPrimariesKey, + kCVImageBufferColorPrimaries_ITU_R_709_2, + kCVAttachmentMode_ShouldPropagate); + CVBufferSetAttachment(imageBuffer, kCVImageBufferTransferFunctionKey, + kCVImageBufferTransferFunction_sRGB, kCVAttachmentMode_ShouldPropagate); + CVBufferSetAttachment(imageBuffer, kCVImageBufferYCbCrMatrixKey, + kCVImageBufferYCbCrMatrix_ITU_R_709_2, kCVAttachmentMode_ShouldPropagate); + CVBufferSetAttachment(imageBuffer, (CFStringRef) @"ColorInfoGuessedBy", + (CFStringRef) @"RTCVideoDecoderH265", kCVAttachmentMode_ShouldPropagate); +} + +std::span vpsDataFromHvcc(const uint8_t *hvccData, size_t hvccDataSize) { + // Avoid copying: parse directly from the provided buffer. + webrtc::BitstreamReader reader( + absl::string_view(reinterpret_cast(hvccData), hvccDataSize)); + + // configuration_version + auto version = reader.Read(); + if (version > 1) { + reader.Ok(); + return {}; + } + // profile_indication + reader.ConsumeBits(8); + // general_profile_compatibility_flags + reader.ConsumeBits(32); + // general_constraint_indicator_flags_hi; + reader.ConsumeBits(32); + // general_constraint_indicator_flags_lo; + reader.ConsumeBits(16); + // general_level_idc; + reader.ConsumeBits(8); + // min_spatial_segmentation_idc + reader.ConsumeBits(16); + // parallelismType; + reader.ConsumeBits(8); + // chromaFormat; + reader.ConsumeBits(8); + // bitDepthLumaMinus8 + reader.ConsumeBits(8); + // bitDepthChromaMinus8 + reader.ConsumeBits(8); + // avgFrameRate + reader.ConsumeBits(16); + // misc + reader.ConsumeBits(8); + auto numOfArrays = reader.Read(); + + if (!reader.Ok()) { + return {}; + } + + for (uint32_t j = 0; j < numOfArrays; j++) { + // NAL_unit_type: bit(1) array_completeness; unsigned int(1) reserved = 0; unsigned int(6) + // NAL_unit_type; + auto nalUnitType = reader.Read() & 0x3F; + // numNalus + auto numOfNalus = reader.Read(); + if (!reader.Ok()) { + return {}; + } + + for (uint32_t k = 0; k < numOfNalus; k++) { + // nalUnitLength + auto size = reader.Read(); + + // Position at the start of NAL unit payload in bytes. + size_t nalStartPos = hvccDataSize - static_cast(reader.RemainingBitCount() / 8); + + // nalUnit payload + reader.ConsumeBits(8 * size); + + static const size_t hevcNalHeaderSize = 2; + if (!reader.Ok() || size <= hevcNalHeaderSize) { + return {}; + } + + if (nalUnitType != webrtc::H265::NaluType::kVps) { + continue; + } + + return {hvccData + nalStartPos + hevcNalHeaderSize, size - hevcNalHeaderSize}; + } + } + reader.Ok(); + return {}; +} + +uint8_t ComputeH265ReorderSizeFromVPS(const uint8_t *spsData, size_t spsDataSize) { + auto parsedVps = webrtc::H265VpsParser::ParseVps(spsData, spsDataSize); + if (!parsedVps) return 0; + + auto reorderSize = *std::max_element( + parsedVps->vps_max_num_reorder_pics, + parsedVps->vps_max_num_reorder_pics + parsedVps->vps_max_sub_layers_minus1 + 1); + // We use a max value of 16 + return std::min(reorderSize, 16u); +} + +uint8_t ComputeH265ReorderSizeFromHVCC(const uint8_t *hvccData, size_t hvccDataSize) { + // FIXME: we should probably get the VPS from the SPS sps_video_parameter_set_id. + auto vpsData = vpsDataFromHvcc(hvccData, hvccDataSize); + if (!vpsData.size()) return 0; + + return ComputeH265ReorderSizeFromVPS(vpsData.data(), vpsData.size()); +} + +uint8_t ComputeH265ReorderSizeFromAnnexB(const uint8_t *annexb_buffer, size_t annexb_buffer_size) { + // FIXME: we should probably get the VPS from the SPS sps_video_parameter_set_id. + webrtc::AnnexBBufferReader bufferReader(annexb_buffer, annexb_buffer_size); + if (!bufferReader.SeekToNextNaluOfType(webrtc::H265::kVps)) return 0; + + static const size_t hevcNalHeaderSize = 2; + const uint8_t *data; + size_t data_len; + if (!bufferReader.ReadNalu(&data, &data_len) || data_len <= hevcNalHeaderSize) return 0; + + return ComputeH265ReorderSizeFromVPS(data + hevcNalHeaderSize, data_len - hevcNalHeaderSize); +} + +// This is the callback function that VideoToolbox calls when decode is +// complete. +void h265DecompressionOutputCallback(void *decoderRef, void *params, OSStatus status, + VTDecodeInfoFlags infoFlags, CVImageBufferRef imageBuffer, + CMTime timestamp, CMTime duration) { + std::unique_ptr decodeParams( + reinterpret_cast(params)); + RTC_OBJC_TYPE(RTCVideoDecoderH265) *decoder = + (__bridge RTC_OBJC_TYPE(RTCVideoDecoderH265) *)decoderRef; + if (status != noErr || !imageBuffer) { + [decoder setError:status != noErr ? status : 1]; + RTC_LOG(LS_ERROR) << "Failed to decode frame. Status: " << status; + [decoder processFrame:nil reorderSize:decodeParams->reorderSize]; + return; + } + + overrideColorSpaceAttachments(imageBuffer); + + // TODO(tkchin): Handle CVO properly. + RTC_OBJC_TYPE(RTCCVPixelBuffer) *frameBuffer = + [[RTC_OBJC_TYPE(RTCCVPixelBuffer) alloc] initWithPixelBuffer:imageBuffer]; + RTC_OBJC_TYPE(RTCVideoFrame) *decodedFrame = [[RTC_OBJC_TYPE(RTCVideoFrame) alloc] + initWithBuffer:frameBuffer + rotation:RTC_OBJC_TYPE(RTCVideoRotation_0) + timeStampNs:CMTimeGetSeconds(timestamp) * rtc::kNumNanosecsPerSec]; + decodedFrame.timeStamp = decodeParams->timestamp; + [decoder processFrame:decodedFrame reorderSize:decodeParams->reorderSize]; +} + +// Decoder. +@implementation RTC_OBJC_TYPE (RTCVideoDecoderH265) { + CMVideoFormatDescriptionRef _videoFormat; + VTDecompressionSessionRef _decompressionSession; + RTCVideoDecoderCallback _callback; + OSStatus _error; + bool _useHEVC; + webrtc::RTCVideoFrameReorderQueue _reorderQueue; +} + +- (instancetype)init { + self = [super init]; + if (self) { + _useHEVC = false; + } + + return self; +} + +- (void)dealloc { + [self destroyDecompressionSession]; + [self setVideoFormat:nullptr]; +} + +- (NSInteger)startDecodeWithNumberOfCores:(int)numberOfCores { + return WEBRTC_VIDEO_CODEC_OK; +} + +CMSampleBufferRef H265BufferToCMSampleBuffer(const uint8_t *buffer, size_t buffer_size, + CMVideoFormatDescriptionRef video_format) + CF_RETURNS_RETAINED { + CMBlockBufferRef new_block_buffer; + if (auto error = CMBlockBufferCreateWithMemoryBlock( + kCFAllocatorDefault, NULL, buffer_size, kCFAllocatorDefault, NULL, 0, buffer_size, + kCMBlockBufferAssureMemoryNowFlag, &new_block_buffer)) { + RTC_LOG(LS_ERROR) + << "H265BufferToCMSampleBuffer CMBlockBufferCreateWithMemoryBlock failed with: " << error; + return nullptr; + } + auto block_buffer = rtc::ScopedCF(new_block_buffer); + + if (auto error = CMBlockBufferReplaceDataBytes(buffer, block_buffer.get(), 0, buffer_size)) { + RTC_LOG(LS_ERROR) << "H265BufferToCMSampleBuffer CMBlockBufferReplaceDataBytes failed with: " + << error; + return nullptr; + } + + CMSampleBufferRef sample_buffer = nullptr; + if (auto error = + CMSampleBufferCreate(kCFAllocatorDefault, block_buffer.get(), true, nullptr, nullptr, + video_format, 1, 0, nullptr, 0, nullptr, &sample_buffer)) { + RTC_LOG(LS_ERROR) << "H265BufferToCMSampleBuffer CMSampleBufferCreate failed with: " << error; + return nullptr; + } + return sample_buffer; +} + +- (NSInteger)decode:(RTC_OBJC_TYPE(RTCEncodedImage) *)inputImage + missingFrames:(BOOL)missingFrames + codecSpecificInfo:(__nullable id)info + renderTimeMs:(int64_t)renderTimeMs { + RTC_DCHECK(inputImage.buffer); + return [self decodeData:(uint8_t *)inputImage.buffer.bytes + size:inputImage.buffer.length + timeStamp:inputImage.timeStamp]; +} + +- (NSInteger)decodeData:(const uint8_t *)data size:(size_t)size timeStamp:(int64_t)timeStamp { + if (_error != noErr) { + RTC_LOG(LS_WARNING) << "Last frame decode failed."; + _error = noErr; + return WEBRTC_VIDEO_CODEC_ERROR; + } + if (!data || !size) { + RTC_LOG(LS_WARNING) << "Empty frame."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + if (!_useHEVC) { + rtc::ScopedCFTypeRef inputFormat = + rtc::ScopedCF(webrtc::CreateH265VideoFormatDescription((uint8_t *)data, size)); + if (inputFormat) { + _reorderQueue.setReorderSize(ComputeH265ReorderSizeFromAnnexB(data, size)); + + CMVideoDimensions dimensions = CMVideoFormatDescriptionGetDimensions(inputFormat.get()); + RTC_LOG(LS_INFO) << "Resolution: " << dimensions.width << " x " << dimensions.height; + // Check if the video format has changed, and reinitialize decoder if needed. + if (!CMFormatDescriptionEqual(inputFormat.get(), _videoFormat)) { + [self setVideoFormat:inputFormat.get()]; + int resetDecompressionSessionError = [self resetDecompressionSession]; + if (resetDecompressionSessionError != WEBRTC_VIDEO_CODEC_OK) { + return resetDecompressionSessionError; + } + } + } + } + if (!_videoFormat) { + // We received a frame but we don't have format information so we can't + // decode it. + // This can happen after backgrounding. We need to wait for the next + // sps/pps before we can resume so we request a keyframe by returning an + // error. + RTC_LOG(LS_WARNING) << "Missing video format. Frame with sps/pps required."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + + CMSampleBufferRef sampleBuffer = nullptr; + if (_useHEVC) { + sampleBuffer = H265BufferToCMSampleBuffer(data, size, _videoFormat); + if (!sampleBuffer) return WEBRTC_VIDEO_CODEC_ERROR; + } else if (!webrtc::H265AnnexBBufferToCMSampleBuffer((uint8_t *)data, size, _videoFormat, + &sampleBuffer)) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + RTC_DCHECK(sampleBuffer); + VTDecodeFrameFlags decodeFlags = kVTDecodeFrame_EnableAsynchronousDecompression; + std::unique_ptr frameDecodeParams; + frameDecodeParams.reset(new RTCH265FrameDecodeParams(timeStamp, _reorderQueue.reorderSize())); + OSStatus status = VTDecompressionSessionDecodeFrame( + _decompressionSession, sampleBuffer, decodeFlags, frameDecodeParams.release(), nullptr); + // Re-initialize the decoder if we have an invalid session while the app is + // active and retry the decode request. + if (status == kVTInvalidSessionErr && [self resetDecompressionSession] == WEBRTC_VIDEO_CODEC_OK) { + frameDecodeParams.reset(new RTCH265FrameDecodeParams(timeStamp, _reorderQueue.reorderSize())); + status = VTDecompressionSessionDecodeFrame(_decompressionSession, sampleBuffer, decodeFlags, + frameDecodeParams.release(), nullptr); + } + CFRelease(sampleBuffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to decode frame with code: " << status; + return WEBRTC_VIDEO_CODEC_ERROR; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +- (NSInteger)setHVCCFormat:(const uint8_t *)data + size:(size_t)size + width:(uint16_t)width + height:(uint16_t)height { + CFStringRef avcCString = (CFStringRef) @"hvcC"; + CFDataRef codecConfig = CFDataCreate(kCFAllocatorDefault, data, size); + CFDictionaryRef atomsDict = + CFDictionaryCreate(NULL, (const void **)&avcCString, (const void **)&codecConfig, 1, + &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); + CFDictionaryRef extensionsDict = CFDictionaryCreate( + NULL, (const void **)&kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms, + (const void **)&atomsDict, 1, &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); + + CMVideoFormatDescriptionRef videoFormatDescription = nullptr; + auto err = CMVideoFormatDescriptionCreate(NULL, kCMVideoCodecType_HEVC, width, height, + extensionsDict, &videoFormatDescription); + CFRelease(codecConfig); + CFRelease(atomsDict); + CFRelease(extensionsDict); + + if (err) { + RTC_LOG(LS_ERROR) << "Cannot create fromat description."; + return err; + } + + rtc::ScopedCFTypeRef inputFormat = + rtc::ScopedCF(videoFormatDescription); + if (inputFormat) { + _reorderQueue.setReorderSize(ComputeH265ReorderSizeFromHVCC(data, size)); + + // Check if the video format has changed, and reinitialize decoder if + // needed. + if (!CMFormatDescriptionEqual(inputFormat.get(), _videoFormat)) { + [self setVideoFormat:inputFormat.get()]; + int resetDecompressionSessionError = [self resetDecompressionSession]; + if (resetDecompressionSessionError != WEBRTC_VIDEO_CODEC_OK) { + return resetDecompressionSessionError; + } + } + } + _useHEVC = true; + return 0; +} + +- (void)setCallback:(RTCVideoDecoderCallback)callback { + _callback = callback; +} + +- (void)setError:(OSStatus)error { + _error = error; +} + +- (NSInteger)releaseDecoder { + // Need to invalidate the session so that callbacks no longer occur and it + // is safe to null out the callback. + [self destroyDecompressionSession]; + [self setVideoFormat:nullptr]; + _callback = nullptr; + return WEBRTC_VIDEO_CODEC_OK; +} + +#pragma mark - Private + +- (int)resetDecompressionSession { + [self destroyDecompressionSession]; + + // Need to wait for the first SPS to initialize decoder. + if (!_videoFormat) { + return WEBRTC_VIDEO_CODEC_OK; + } + + // Set keys for OpenGL and IOSurface compatibilty, which makes the encoder + // create pixel buffers with GPU backed memory. The intent here is to pass + // the pixel buffers directly so we avoid a texture upload later during + // rendering. This currently is moot because we are converting back to an + // I420 frame after decode, but eventually we will be able to plumb + // CVPixelBuffers directly to the renderer. + // TODO(tkchin): Maybe only set OpenGL/IOSurface keys if we know that that + // we can pass CVPixelBuffers as native handles in decoder output. + static size_t const attributesSize = 3; + CFTypeRef keys[attributesSize] = { +#if defined(WEBRTC_MAC) || defined(WEBRTC_MAC_CATALYST) + kCVPixelBufferOpenGLCompatibilityKey, +#elif defined(WEBRTC_IOS) + kCVPixelBufferOpenGLESCompatibilityKey, +#endif + kCVPixelBufferIOSurfacePropertiesKey, kCVPixelBufferPixelFormatTypeKey}; + CFDictionaryRef ioSurfaceValue = CreateCFTypeDictionary(nullptr, nullptr, 0); + int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange; + CFNumberRef pixelFormat = CFNumberCreate(nullptr, kCFNumberLongType, &nv12type); + CFTypeRef values[attributesSize] = {kCFBooleanTrue, ioSurfaceValue, pixelFormat}; + CFDictionaryRef attributes = CreateCFTypeDictionary(keys, values, attributesSize); + if (ioSurfaceValue) { + CFRelease(ioSurfaceValue); + ioSurfaceValue = nullptr; + } + if (pixelFormat) { + CFRelease(pixelFormat); + pixelFormat = nullptr; + } + VTDecompressionOutputCallbackRecord record = { + h265DecompressionOutputCallback, + (__bridge void *)self, + }; + OSStatus status = VTDecompressionSessionCreate(nullptr, _videoFormat, nullptr, attributes, + &record, &_decompressionSession); + CFRelease(attributes); + if (status != noErr) { + [self destroyDecompressionSession]; + return WEBRTC_VIDEO_CODEC_ERROR; + } + [self configureDecompressionSession]; + + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)configureDecompressionSession { + RTC_DCHECK(_decompressionSession); + VTSessionSetProperty(_decompressionSession, kVTDecompressionPropertyKey_RealTime, kCFBooleanTrue); +} + +- (void)destroyDecompressionSession { + if (_decompressionSession) { + VTDecompressionSessionWaitForAsynchronousFrames(_decompressionSession); + VTDecompressionSessionInvalidate(_decompressionSession); + CFRelease(_decompressionSession); + _decompressionSession = nullptr; + } +} + +- (void)flush { + if (_decompressionSession) VTDecompressionSessionWaitForAsynchronousFrames(_decompressionSession); + + while (auto *frame = _reorderQueue.takeIfAny()) { + _callback(frame); + } +} + +- (void)setVideoFormat:(CMVideoFormatDescriptionRef)videoFormat { + if (_videoFormat == videoFormat) { + return; + } + if (_videoFormat) { + CFRelease(_videoFormat); + } + _videoFormat = videoFormat; + if (_videoFormat) { + CFRetain(_videoFormat); + } +} + +- (NSString *)implementationName { + return @"VideoToolbox"; +} + +- (void)processFrame:(RTC_OBJC_TYPE(RTCVideoFrame) *)decodedFrame + reorderSize:(uint64_t)reorderSize { + // FIXME: In case of IDR, we could push out all queued frames. + if (!_reorderQueue.isEmpty() || reorderSize) { + _reorderQueue.append(decodedFrame, reorderSize); + while (auto *frame = _reorderQueue.takeIfAvailable()) { + _callback(frame); + } + return; + } + _callback(decodedFrame); +} + +@end \ No newline at end of file diff --git a/sdk/objc/components/video_codec/RTCVideoEncoderFactorySimulcast.mm b/sdk/objc/components/video_codec/RTCVideoEncoderFactorySimulcast.mm index e9c9c5bde3..1b5c5d9544 100644 --- a/sdk/objc/components/video_codec/RTCVideoEncoderFactorySimulcast.mm +++ b/sdk/objc/components/video_codec/RTCVideoEncoderFactorySimulcast.mm @@ -1,5 +1,7 @@ #import +#import "RTCH264ProfileLevelId.h" +#import "RTCH265ProfileLevelId.h" #import "RTCMacros.h" #import "RTCVideoCodecInfo.h" #import "RTCVideoEncoderFactorySimulcast.h" @@ -57,6 +59,11 @@ - (instancetype)initWithPrimary:(id)prima RTC_OBJC_TYPE(RTCVideoCodecInfo) *av1Codec = [[RTC_OBJC_TYPE(RTCVideoCodecInfo) alloc] initWithNativeSdpVideoFormat: av1Format]; [addingCodecs addObject: av1Codec]; + // H265 + auto *h265Codec = [[RTC_OBJC_TYPE(RTCVideoCodecInfo) alloc] + initWithName:RTC_CONSTANT_TYPE(RTCVideoCodecH265Name)]; + [addingCodecs addObject:h265Codec]; + return [supportedCodecs arrayByAddingObjectsFromArray: addingCodecs]; } diff --git a/sdk/objc/components/video_codec/RTCVideoEncoderH264.mm b/sdk/objc/components/video_codec/RTCVideoEncoderH264.mm index f7771532ba..0b3fc0bad7 100644 --- a/sdk/objc/components/video_codec/RTCVideoEncoderH264.mm +++ b/sdk/objc/components/video_codec/RTCVideoEncoderH264.mm @@ -736,20 +736,21 @@ - (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat { (NSString *)kCVPixelBufferPixelFormatTypeKey : @(framePixelFormat), }; - NSMutableDictionary *encoder_specs; -#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) - // Currently hw accl is supported above 360p on mac, below 360p - // the compression session will be created with hw accl disabled. - encoder_specs = [@{ - (NSString *)kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder : @(YES), - } mutableCopy]; + NSMutableDictionary *encoder_specs = [@{} mutableCopy]; + + if (@available(iOS 17.4, macCatalyst 17.4, macOS 10.9, tvOS 17.4, visionOS 1.1, *)) { + [encoder_specs addEntriesFromDictionary:@{ + (NSString *)kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder : @(YES), + }]; + } + // Enable low-latency video encoding - if (@available(iOS 14.5, macOS 11.3, *)) { + if (@available(iOS 14.5, macCatalyst 14.5, macOS 11.3, tvOS 14.5, visionOS 1.0, *)) { [encoder_specs addEntriesFromDictionary:@{ (NSString *)kVTVideoEncoderSpecification_EnableLowLatencyRateControl : @(YES), }]; } -#endif + OSStatus status = VTCompressionSessionCreate( nullptr, // use default allocator _width, @@ -766,19 +767,17 @@ - (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat { RTC_LOG(LS_ERROR) << "Failed to create compression session: " << status; return WEBRTC_VIDEO_CODEC_ERROR; } -#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) - CFBooleanRef hwaccl_enabled = nullptr; - status = VTSessionCopyProperty( - _compressionSession, - kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder, - nullptr, - &hwaccl_enabled); - if (status == noErr && (CFBooleanGetValue(hwaccl_enabled))) { - RTC_LOG(LS_INFO) << "Compression session created with hw accl enabled"; - } else { - RTC_LOG(LS_INFO) << "Compression session created with hw accl disabled"; + if (@available(iOS 17.4, macCatalyst 17.4, macOS 10.9, tvOS 17.4, visionOS 1.1, *)) { + CFBooleanRef hwaccl_enabled = nullptr; + status = VTSessionCopyProperty(_compressionSession, + kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder, + kCFAllocatorDefault, &hwaccl_enabled); + if (status == noErr && (CFBooleanGetValue(hwaccl_enabled))) { + RTC_LOG(LS_INFO) << "Compression session created with hw accl enabled"; + } else { + RTC_LOG(LS_INFO) << "Compression session created with hw accl disabled"; + } } -#endif [self configureCompressionSession]; return WEBRTC_VIDEO_CODEC_OK; diff --git a/sdk/objc/components/video_codec/RTCVideoEncoderH265.h b/sdk/objc/components/video_codec/RTCVideoEncoderH265.h new file mode 100644 index 0000000000..1d2436993e --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoEncoderH265.h @@ -0,0 +1,22 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#import + +#import "RTCMacros.h" +#import "RTCVideoCodecInfo.h" +#import "RTCVideoEncoder.h" + +RTC_OBJC_EXPORT +@interface RTC_OBJC_TYPE (RTCVideoEncoderH265) : NSObject + +- (instancetype)initWithCodecInfo:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)codecInfo; +- (void)flush; +@end \ No newline at end of file diff --git a/sdk/objc/components/video_codec/RTCVideoEncoderH265.mm b/sdk/objc/components/video_codec/RTCVideoEncoderH265.mm new file mode 100644 index 0000000000..943b07abcd --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoEncoderH265.mm @@ -0,0 +1,577 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#import "RTCVideoEncoderH265.h" + +#import +#include + +#import "RTCCodecSpecificInfoH265.h" +// #import "api/peerconnection/RTCRtpFragmentationHeader+Private.h" +#import "api/peerconnection/RTCVideoCodecInfo+Private.h" +#import "base/RTCI420Buffer.h" +#import "base/RTCVideoFrame.h" +#import "base/RTCVideoFrameBuffer.h" +#import "components/video_frame_buffer/RTCCVPixelBuffer.h" +#import "helpers.h" +#if defined(WEBRTC_IOS) +#import "helpers/UIDevice+RTCDevice.h" +#endif +#import "RTCH265ProfileLevelId.h" + +#include "common_video/h265/h265_bitstream_parser.h" +#include "common_video/include/bitrate_adjuster.h" +#include "libyuv/convert_from.h" +#include "modules/include/module_common_types.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/buffer.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" +#include "sdk/objc/Framework/Classes/VideoToolbox/nalu_rewriter.h" +#include "system_wrappers/include/clock.h" + +@interface RTC_OBJC_TYPE (RTCVideoEncoderH265) () + +- (void)frameWasEncoded:(OSStatus)status + flags:(VTEncodeInfoFlags)infoFlags + sampleBuffer:(CMSampleBufferRef)sampleBuffer + width:(int32_t)width + height:(int32_t)height + renderTimeMs:(int64_t)renderTimeMs + timestamp:(uint32_t)timestamp + rotation:(RTC_OBJC_TYPE(RTCVideoRotation))rotation; +@end + +namespace { // anonymous namespace + +// These thresholds deviate from the default h265 QP thresholds, as they +// have been found to work better on devices that support VideoToolbox +const int kLowh265QpThreshold = 28; +const int kHighh265QpThreshold = 39; + +// Struct that we pass to the encoder per frame to encode. We receive it again +// in the encoder callback. +struct API_AVAILABLE(ios(11.0)) RTC_OBJC_TYPE(RTCFrameEncodeParams) { + RTC_OBJC_TYPE(RTCFrameEncodeParams)(RTC_OBJC_TYPE(RTCVideoEncoderH265) * e, int32_t w, int32_t h, + int64_t rtms, uint32_t ts, RTC_OBJC_TYPE(RTCVideoRotation) r) + : encoder(e), width(w), height(h), render_time_ms(rtms), timestamp(ts), rotation(r) {} + + RTC_OBJC_TYPE(RTCVideoEncoderH265) * encoder; + int32_t width; + int32_t height; + int64_t render_time_ms; + uint32_t timestamp; + RTC_OBJC_TYPE(RTCVideoRotation) rotation; +}; + +// We receive I420Frames as input, but we need to feed CVPixelBuffers into the +// encoder. This performs the copy and format conversion. +// TODO(tkchin): See if encoder will accept i420 frames and compare performance. +bool CopyVideoFrameToPixelBuffer(id frameBuffer, + CVPixelBufferRef pixelBuffer) { + RTC_DCHECK(pixelBuffer); + RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(pixelBuffer), + kCVPixelFormatType_420YpCbCr8BiPlanarFullRange); + RTC_DCHECK_EQ(CVPixelBufferGetHeightOfPlane(pixelBuffer, 0), frameBuffer.height); + RTC_DCHECK_EQ(CVPixelBufferGetWidthOfPlane(pixelBuffer, 0), frameBuffer.width); + + CVReturn cvRet = CVPixelBufferLockBaseAddress(pixelBuffer, 0); + if (cvRet != kCVReturnSuccess) { + RTC_LOG(LS_ERROR) << "Failed to lock base address: " << cvRet; + return false; + } + + uint8_t* dstY = reinterpret_cast(CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0)); + int dstStrideY = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0); + uint8_t* dstUV = reinterpret_cast(CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1)); + int dstStrideUV = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1); + // Convert I420 to NV12. + int ret = + libyuv::I420ToNV12(frameBuffer.dataY, frameBuffer.strideY, frameBuffer.dataU, + frameBuffer.strideU, frameBuffer.dataV, frameBuffer.strideV, dstY, + dstStrideY, dstUV, dstStrideUV, frameBuffer.width, frameBuffer.height); + CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); + if (ret) { + RTC_LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret; + return false; + } + return true; +} + +CVPixelBufferRef CreatePixelBuffer(CVPixelBufferPoolRef pixel_buffer_pool) { + if (!pixel_buffer_pool) { + RTC_LOG(LS_ERROR) << "Failed to get pixel buffer pool."; + return nullptr; + } + CVPixelBufferRef pixel_buffer; + CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool, &pixel_buffer); + if (ret != kCVReturnSuccess) { + RTC_LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret; + // We probably want to drop frames here, since failure probably means + // that the pool is empty. + return nullptr; + } + return pixel_buffer; +} + +// This is the callback function that VideoToolbox calls when encode is +// complete. From inspection this happens on its own queue. +void compressionOutputCallback(void* encoder, void* params, OSStatus status, + VTEncodeInfoFlags infoFlags, CMSampleBufferRef sampleBuffer) + API_AVAILABLE(ios(11.0)) { + RTC_CHECK(params); + std::unique_ptr encodeParams( + reinterpret_cast(params)); + RTC_CHECK(encodeParams->encoder); + [encodeParams->encoder frameWasEncoded:status + flags:infoFlags + sampleBuffer:sampleBuffer + width:encodeParams->width + height:encodeParams->height + renderTimeMs:encodeParams->render_time_ms + timestamp:encodeParams->timestamp + rotation:encodeParams->rotation]; +} +} // namespace + +@implementation RTC_OBJC_TYPE (RTCVideoEncoderH265) { + RTC_OBJC_TYPE(RTCVideoCodecInfo) * _codecInfo; + std::unique_ptr _bitrateAdjuster; + uint32_t _targetBitrateBps; + uint32_t _encoderBitrateBps; + CFStringRef _profile; + RTCVideoEncoderCallback _callback; + int32_t _width; + int32_t _height; + VTCompressionSessionRef _compressionSession; + RTC_OBJC_TYPE(RTCVideoCodecMode) _mode; + int framesLeft; + std::vector _nv12ScaleBuffer; + webrtc::H265BitstreamParser _h265BitstreamParser; +} + +// .5 is set as a mininum to prevent overcompensating for large temporary +// overshoots. We don't want to degrade video quality too badly. +// .95 is set to prevent oscillations. When a lower bitrate is set on the +// encoder than previously set, its output seems to have a brief period of +// drastically reduced bitrate, so we want to avoid that. In steady state +// conditions, 0.95 seems to give us better overall bitrate over long periods +// of time. +- (instancetype)initWithCodecInfo:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)codecInfo { + NSParameterAssert(codecInfo); + self = [super init]; + if (self) { + _codecInfo = codecInfo; + _bitrateAdjuster.reset(new webrtc::BitrateAdjuster(.5, .95)); + // AnnexB and low latency are always enabled. + RTC_CHECK([codecInfo.name isEqualToString:RTC_CONSTANT_TYPE(RTCVideoCodecH265Name)]); + } + + return self; +} + +- (void)dealloc { + [self destroyCompressionSession]; +} + +- (NSInteger)startEncodeWithSettings:(RTC_OBJC_TYPE(RTCVideoEncoderSettings) *)settings + numberOfCores:(int)numberOfCores { + RTC_DCHECK(settings); + RTC_DCHECK([settings.name isEqualToString:RTC_CONSTANT_TYPE(RTCVideoCodecH265Name)]); + + _width = settings.width; + _height = settings.height; + _mode = settings.mode; + + // We can only set average bitrate on the HW encoder. + _targetBitrateBps = settings.startBitrate; + _bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); + + return [self resetCompressionSession]; +} + +// AnnexB and low latency are always enabled; setters removed. + +- (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame + codecSpecificInfo:(nullable id)codecSpecificInfo + frameTypes:(NSArray*)frameTypes { + if (!_callback || !_compressionSession) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + BOOL isKeyframeRequired = NO; + + // Get a pixel buffer from the pool and copy frame data over. + CVPixelBufferPoolRef pixelBufferPool = + VTCompressionSessionGetPixelBufferPool(_compressionSession); + +#if defined(WEBRTC_IOS) + if (!pixelBufferPool) { + // Kind of a hack. On backgrounding, the compression session seems to get + // invalidated, which causes this pool call to fail when the application + // is foregrounded and frames are being sent for encoding again. + // Resetting the session when this happens fixes the issue. + // In addition we request a keyframe so video can recover quickly. + [self resetCompressionSession]; + pixelBufferPool = VTCompressionSessionGetPixelBufferPool(_compressionSession); + isKeyframeRequired = YES; + RTC_LOG(LS_INFO) << "Resetting compression session due to invalid pool."; + } +#endif + + CVPixelBufferRef pixelBuffer = nullptr; + if ([frame.buffer isKindOfClass:[RTC_OBJC_TYPE(RTCCVPixelBuffer) class]]) { + // Native frame buffer + RTC_OBJC_TYPE(RTCCVPixelBuffer)* rtcPixelBuffer = + (RTC_OBJC_TYPE(RTCCVPixelBuffer)*)frame.buffer; + if (![rtcPixelBuffer requiresCropping]) { + // This pixel buffer might have a higher resolution than what the + // compression session is configured to. The compression session can + // handle that and will output encoded frames in the configured + // resolution regardless of the input pixel buffer resolution. + pixelBuffer = rtcPixelBuffer.pixelBuffer; + CVBufferRetain(pixelBuffer); + } else { + // Cropping required, we need to crop and scale to a new pixel buffer. + pixelBuffer = CreatePixelBuffer(pixelBufferPool); + if (!pixelBuffer) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + int dstWidth = CVPixelBufferGetWidth(pixelBuffer); + int dstHeight = CVPixelBufferGetHeight(pixelBuffer); + if ([rtcPixelBuffer requiresScalingToWidth:dstWidth height:dstHeight]) { + const int requiredSize = [rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth + height:dstHeight]; + if (static_cast(_nv12ScaleBuffer.size()) < requiredSize) { + _nv12ScaleBuffer.resize(requiredSize); + } + } + if (![rtcPixelBuffer cropAndScaleTo:pixelBuffer withTempBuffer:_nv12ScaleBuffer.data()]) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + } + } + + if (!pixelBuffer) { + // We did not have a native frame buffer + RTC_DCHECK_EQ(frame.width, _width); + RTC_DCHECK_EQ(frame.height, _height); + + pixelBuffer = CreatePixelBuffer(pixelBufferPool); + if (!pixelBuffer) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + RTC_DCHECK(pixelBuffer); + if (!CopyVideoFrameToPixelBuffer([frame.buffer toI420], pixelBuffer)) { + RTC_LOG(LS_ERROR) << "Failed to copy frame data."; + CVBufferRelease(pixelBuffer); + return WEBRTC_VIDEO_CODEC_ERROR; + } + } + + // Check if we need a keyframe. + if (!isKeyframeRequired && frameTypes) { + for (NSNumber* frameType in frameTypes) { + if ((RTC_OBJC_TYPE(RTCFrameType))frameType.intValue == + RTC_OBJC_TYPE(RTCFrameTypeVideoFrameKey)) { + isKeyframeRequired = YES; + break; + } + } + } + + CMTime presentationTimeStamp = CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000); + CFDictionaryRef frameProperties = nullptr; + if (isKeyframeRequired) { + // Reuse a static dictionary to avoid per-frame allocations. + static CFDictionaryRef forceKeyframeProps = []() { + CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame}; + CFTypeRef values[] = {kCFBooleanTrue}; + CFDictionaryRef dict = CreateCFTypeDictionary(keys, values, 1); + // Intentionally leaked for process lifetime reuse. + return dict; + }(); + frameProperties = forceKeyframeProps; + } + + std::unique_ptr encodeParams; + encodeParams.reset(new RTC_OBJC_TYPE(RTCFrameEncodeParams)( + self, _width, _height, frame.timeStampNs / rtc::kNumNanosecsPerMillisec, frame.timeStamp, + frame.rotation)); + + // Update the bitrate if needed. + [self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps()]; + + OSStatus status = VTCompressionSessionEncodeFrame( + _compressionSession, pixelBuffer, presentationTimeStamp, kCMTimeInvalid, frameProperties, + encodeParams.release(), nullptr); + // Do not release `frameProperties` when using the cached dictionary. + if (pixelBuffer) { + CVBufferRelease(pixelBuffer); + } + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to encode frame with code: " << status; + return WEBRTC_VIDEO_CODEC_ERROR; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)setCallback:(RTCVideoEncoderCallback)callback { + _callback = callback; +} + +- (int)setBitrate:(uint32_t)bitrateKbit framerate:(uint32_t)framerate { + _targetBitrateBps = 1000 * bitrateKbit; + _bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); + [self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps()]; + return WEBRTC_VIDEO_CODEC_OK; +} + +- (NSInteger)resolutionAlignment { + return 1; +} + +- (BOOL)applyAlignmentToAllSimulcastLayers { + return NO; +} + +- (BOOL)supportsNativeHandle { + return YES; +} + +#pragma mark - Private + +- (NSInteger)releaseEncoder { + // Need to destroy so that the session is invalidated and won't use the + // callback anymore. Do not remove callback until the session is invalidated + // since async encoder callbacks can occur until invalidation. + [self destroyCompressionSession]; + _callback = nullptr; + return WEBRTC_VIDEO_CODEC_OK; +} + +- (int)resetCompressionSession { + [self destroyCompressionSession]; + + // Set source image buffer attributes. These attributes will be present on + // buffers retrieved from the encoder's pixel buffer pool. + const size_t attributesSize = 3; + CFTypeRef keys[attributesSize] = { +#if defined(WEBRTC_MAC) || defined(WEBRTC_MAC_CATALYST) + kCVPixelBufferOpenGLCompatibilityKey, +#elif defined(WEBRTC_IOS) + kCVPixelBufferOpenGLESCompatibilityKey, +#endif + kCVPixelBufferIOSurfacePropertiesKey, kCVPixelBufferPixelFormatTypeKey}; + CFDictionaryRef ioSurfaceValue = CreateCFTypeDictionary(nullptr, nullptr, 0); + int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange; + CFNumberRef pixelFormat = CFNumberCreate(nullptr, kCFNumberLongType, &nv12type); + CFTypeRef values[attributesSize] = {kCFBooleanTrue, ioSurfaceValue, pixelFormat}; + CFDictionaryRef sourceAttributes = CreateCFTypeDictionary(keys, values, attributesSize); + if (ioSurfaceValue) { + CFRelease(ioSurfaceValue); + ioSurfaceValue = nullptr; + } + if (pixelFormat) { + CFRelease(pixelFormat); + pixelFormat = nullptr; + } + CFMutableDictionaryRef encoder_specs = CFDictionaryCreateMutable( + nullptr, 2, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); + + if (@available(iOS 17.4, macCatalyst 17.4, macOS 10.9, tvOS 17.4, visionOS 1.1, *)) { + CFDictionarySetValue(encoder_specs, + kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder, + kCFBooleanTrue); + } + + if (@available(iOS 14.5, macCatalyst 14.5, macOS 11.3, tvOS 14.5, visionOS 1.0, *)) { + CFDictionarySetValue(encoder_specs, kVTVideoEncoderSpecification_EnableLowLatencyRateControl, + kCFBooleanTrue); + } + + OSStatus status = + VTCompressionSessionCreate(nullptr, // use default allocator + _width, _height, kCMVideoCodecType_HEVC, + encoder_specs, // use hardware accelerated encoder if available + sourceAttributes, + nullptr, // use default compressed data allocator + compressionOutputCallback, nullptr, &_compressionSession); + if (status != noErr) { + status = + VTCompressionSessionCreate(nullptr, // use default allocator + _width, _height, kCMVideoCodecType_HEVC, + encoder_specs, // use hardware accelerated encoder if available + sourceAttributes, + nullptr, // use default compressed data allocator + compressionOutputCallback, nullptr, &_compressionSession); + } + if (sourceAttributes) { + CFRelease(sourceAttributes); + sourceAttributes = nullptr; + } + if (encoder_specs) { + CFRelease(encoder_specs); + encoder_specs = nullptr; + } + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to create compression session: " << status; + return WEBRTC_VIDEO_CODEC_ERROR; + } + if (@available(iOS 17.4, macCatalyst 17.4, macOS 10.9, tvOS 17.4, visionOS 1.1, *)) { + CFBooleanRef hwaccl_enabled = nullptr; + status = VTSessionCopyProperty(_compressionSession, + kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder, + kCFAllocatorDefault, &hwaccl_enabled); + if (status == noErr && (CFBooleanGetValue(hwaccl_enabled))) { + RTC_LOG(LS_INFO) << "Compression session created with hw accl enabled"; + } else { + RTC_LOG(LS_INFO) << "Compression session created with hw accl disabled"; + } + } + [self configureCompressionSession]; + return WEBRTC_VIDEO_CODEC_OK; +} + +- (void)configureCompressionSession { + RTC_DCHECK(_compressionSession); + SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_RealTime, true); + // SetVTSessionProperty(_compressionSession, + // kVTCompressionPropertyKey_ProfileLevel, _profile); + SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AllowFrameReordering, false); + // Set maximum QP for screen sharing mode on supported OS versions. + // https://developer.apple.com/documentation/videotoolbox/kvtcompressionpropertykey_maxallowedframeqp + if (@available(iOS 15.0, macOS 12.0, *)) { + if (_mode == RTC_OBJC_TYPE(RTCVideoCodecModeScreensharing)) { + RTC_LOG(LS_INFO) << "Configuring VideoToolbox to use maxQP: " << kHighh265QpThreshold + << " mode: " << _mode; + SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_MaxAllowedFrameQP, + kHighh265QpThreshold); + } + } + // Reduce the encoder's internal buffering for lower latency if available. + // kVTCompressionPropertyKey_MaxFrameDelayCount is supported on macOS/iOS for HEVC. + // SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_MaxFrameDelayCount, 1); + [self setEncoderBitrateBps:_targetBitrateBps]; + + // Set a relatively large value for keyframe emission (7200 frames or 4 minutes). + SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_MaxKeyFrameInterval, 7200); + SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, + 240); + OSStatus status = VTCompressionSessionPrepareToEncodeFrames(_compressionSession); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Compression session failed to prepare encode frames."; + } +} + +- (void)destroyCompressionSession { + if (_compressionSession) { + VTCompressionSessionInvalidate(_compressionSession); + CFRelease(_compressionSession); + _compressionSession = nullptr; + } +} + +- (NSString*)implementationName { + return @"VideoToolbox"; +} + +- (void)setBitrateBps:(uint32_t)bitrateBps { + if (_encoderBitrateBps != bitrateBps) { + [self setEncoderBitrateBps:bitrateBps]; + } +} + +- (void)setEncoderBitrateBps:(uint32_t)bitrateBps { + if (_compressionSession) { + SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AverageBitRate, bitrateBps); + _encoderBitrateBps = bitrateBps; + } +} + +- (void)frameWasEncoded:(OSStatus)status + flags:(VTEncodeInfoFlags)infoFlags + sampleBuffer:(CMSampleBufferRef)sampleBuffer + width:(int32_t)width + height:(int32_t)height + renderTimeMs:(int64_t)renderTimeMs + timestamp:(uint32_t)timestamp + rotation:(RTC_OBJC_TYPE(RTCVideoRotation))rotation { + if (status != noErr) { + RTC_LOG(LS_ERROR) << "h265 encode failed."; + return; + } + if (infoFlags & kVTEncodeInfo_FrameDropped) { + RTC_LOG(LS_INFO) << "h265 encoder dropped a frame."; + return; + } + + BOOL isKeyframe = NO; + CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, 0); + if (attachments != nullptr && CFArrayGetCount(attachments)) { + CFDictionaryRef attachment = + static_cast(CFArrayGetValueAtIndex(attachments, 0)); + isKeyframe = !CFDictionaryContainsKey(attachment, kCMSampleAttachmentKey_NotSync); + } + + if (isKeyframe) { + RTC_LOG(LS_INFO) << "Generated keyframe"; + } + + __block std::unique_ptr buffer = std::make_unique(); + // Always using AnnexB format for bitstream parsing and output + if (!webrtc::H265CMSampleBufferToAnnexBBuffer(sampleBuffer, isKeyframe, buffer.get())) { + RTC_LOG(LS_WARNING) << "Unable to parse H265 encoded buffer"; + return; + } + + RTC_OBJC_TYPE(RTCEncodedImage)* frame = [[RTC_OBJC_TYPE(RTCEncodedImage) alloc] init]; + // This assumes ownership of `buffer` and is responsible for freeing it when done. + frame.buffer = [[NSData alloc] initWithBytesNoCopy:buffer->data() + length:buffer->size() + deallocator:^(void* bytes, NSUInteger size) { + buffer.reset(); + }]; + frame.encodedWidth = width; + frame.encodedHeight = height; + frame.frameType = isKeyframe ? RTC_OBJC_TYPE(RTCFrameTypeVideoFrameKey) + : RTC_OBJC_TYPE(RTCFrameTypeVideoFrameDelta); + frame.captureTimeMs = renderTimeMs; + frame.timeStamp = timestamp; + frame.rotation = rotation; + frame.contentType = (_mode == RTC_OBJC_TYPE(RTCVideoCodecModeScreensharing)) + ? RTC_OBJC_TYPE(RTCVideoContentTypeScreenshare) + : RTC_OBJC_TYPE(RTCVideoContentTypeUnspecified); + frame.flags = webrtc::VideoSendTiming::kInvalid; + + // Always using AnnexB format for QP parsing + _h265BitstreamParser.ParseBitstream(*buffer); + auto qp = _h265BitstreamParser.GetLastSliceQp(); + frame.qp = @(qp.value_or(0)); + + BOOL res = _callback(frame, [[RTC_OBJC_TYPE(RTCCodecSpecificInfoH265) alloc] init]); + if (!res) { + RTC_LOG(LS_ERROR) << "Encode callback failed."; + return; + } + _bitrateAdjuster->Update(frame.buffer.length); +} + +- (RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) *)scalingSettings { + return [[RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) alloc] + initWithThresholdsLow:kLowh265QpThreshold + high:kHighh265QpThreshold]; +} + +- (void)flush { + if (_compressionSession) VTCompressionSessionCompleteFrames(_compressionSession, kCMTimeInvalid); +} + +@end \ No newline at end of file diff --git a/sdk/objc/components/video_codec/RTCVideoFrameReorderQueue.h b/sdk/objc/components/video_codec/RTCVideoFrameReorderQueue.h new file mode 100644 index 0000000000..d49c674a28 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoFrameReorderQueue.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2023 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#import "base/RTCVideoFrame.h" +#include +#include "rtc_base/synchronization/mutex.h" + +namespace webrtc { + +class RTCVideoFrameReorderQueue { +public: + RTCVideoFrameReorderQueue() = default; + + struct RTC_OBJC_TYPE(RTCVideoFrameWithOrder) { + RTC_OBJC_TYPE(RTCVideoFrameWithOrder)(RTC_OBJC_TYPE(RTCVideoFrame) * frame, uint64_t reorderSize) + : frame((__bridge_retained void*)frame) + , timeStamp(frame.timeStamp) + , reorderSize(reorderSize) + { + } + + ~RTC_OBJC_TYPE(RTCVideoFrameWithOrder)() + { + if (frame) + take(); + } + + RTC_OBJC_TYPE(RTCVideoFrame) * take() + { + auto* rtcFrame = (__bridge_transfer RTC_OBJC_TYPE(RTCVideoFrame) *)frame; + frame = nullptr; + return rtcFrame; + } + + void* frame; + uint64_t timeStamp; + uint64_t reorderSize; + }; + + bool isEmpty(); + uint8_t reorderSize() const; + void setReorderSize(uint8_t); + void append(RTC_OBJC_TYPE(RTCVideoFrame) *, uint8_t); + RTC_OBJC_TYPE(RTCVideoFrame) *takeIfAvailable(); + RTC_OBJC_TYPE(RTCVideoFrame) *takeIfAny(); + +private: + std::deque> _reorderQueue; + uint8_t _reorderSize { 0 }; + mutable webrtc::Mutex _reorderQueueLock; +}; + +} \ No newline at end of file diff --git a/sdk/objc/components/video_codec/RTCVideoFrameReorderQueue.mm b/sdk/objc/components/video_codec/RTCVideoFrameReorderQueue.mm new file mode 100644 index 0000000000..7ad7487ac0 --- /dev/null +++ b/sdk/objc/components/video_codec/RTCVideoFrameReorderQueue.mm @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#import "RTCVideoFrameReorderQueue.h" +#include + +namespace webrtc { + +bool RTCVideoFrameReorderQueue::isEmpty() { + webrtc::MutexLock lock(&_reorderQueueLock); + return _reorderQueue.empty(); +} + +uint8_t RTCVideoFrameReorderQueue::reorderSize() const { + webrtc::MutexLock lock(&_reorderQueueLock); + return _reorderSize; +} + +void RTCVideoFrameReorderQueue::setReorderSize(uint8_t size) { + webrtc::MutexLock lock(&_reorderQueueLock); + _reorderSize = size; +} + +void RTCVideoFrameReorderQueue::append(RTC_OBJC_TYPE(RTCVideoFrame) * frame, uint8_t reorderSize) { + webrtc::MutexLock lock(&_reorderQueueLock); + auto newEntry = std::make_unique(frame, reorderSize); + const uint64_t ts = newEntry->timeStamp; + + // Keep queue sorted by timestamp with O(n) insertion instead of sorting + // the entire container each time. + auto it = std::upper_bound( + _reorderQueue.begin(), _reorderQueue.end(), ts, + [](const uint64_t value, const std::unique_ptr &elem) { + return value < elem->timeStamp; + }); + _reorderQueue.insert(it, std::move(newEntry)); +} + +RTC_OBJC_TYPE(RTCVideoFrame) * RTCVideoFrameReorderQueue::takeIfAvailable() { + webrtc::MutexLock lock(&_reorderQueueLock); + if (_reorderQueue.size() && _reorderQueue.size() > _reorderQueue.front()->reorderSize) { + auto *frame = _reorderQueue.front()->take(); + _reorderQueue.pop_front(); + return frame; + } + return nil; +} + +RTC_OBJC_TYPE(RTCVideoFrame) * RTCVideoFrameReorderQueue::takeIfAny() { + webrtc::MutexLock lock(&_reorderQueueLock); + if (_reorderQueue.size()) { + auto *frame = _reorderQueue.front()->take(); + _reorderQueue.pop_front(); + return frame; + } + return nil; +} + +} // namespace webrtc \ No newline at end of file diff --git a/sdk/objc/components/video_codec/nalu_rewriter.cc b/sdk/objc/components/video_codec/nalu_rewriter.cc index 1d5a34758f..73fcd5e3b3 100644 --- a/sdk/objc/components/video_codec/nalu_rewriter.cc +++ b/sdk/objc/components/video_codec/nalu_rewriter.cc @@ -18,6 +18,7 @@ #include "rtc_base/checks.h" #include "rtc_base/logging.h" +#include "common_video/h264/sps_parser.h" namespace webrtc { @@ -164,8 +165,8 @@ bool H264AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, CMBlockBufferRef block_buffer = nullptr; CFAllocatorRef block_allocator = CMMemoryPoolGetAllocator(memory_pool); OSStatus status = CMBlockBufferCreateWithMemoryBlock( - kCFAllocatorDefault, nullptr, reader.BytesRemaining(), block_allocator, - nullptr, 0, reader.BytesRemaining(), kCMBlockBufferAssureMemoryNowFlag, + kCFAllocatorDefault, nullptr, reader.BytesRemainingForAVC(), block_allocator, + nullptr, 0, reader.BytesRemainingForAVC(), kCMBlockBufferAssureMemoryNowFlag, &block_buffer); if (status != kCMBlockBufferNoErr) { RTC_LOG(LS_ERROR) << "Failed to create block buffer."; @@ -199,7 +200,7 @@ bool H264AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, CFRelease(contiguous_buffer); return false; } - RTC_DCHECK(block_buffer_size == reader.BytesRemaining()); + RTC_DCHECK(block_buffer_size == reader.BytesRemainingForAVC()); // Write Avcc NALUs into block buffer memory. AvccBufferWriter writer(reinterpret_cast(data_ptr), @@ -225,6 +226,217 @@ bool H264AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, return true; } +bool H265CMSampleBufferToAnnexBBuffer( + CMSampleBufferRef hvcc_sample_buffer, + bool is_keyframe, + rtc::Buffer* annexb_buffer) { + RTC_DCHECK(hvcc_sample_buffer); + + // Get format description from the sample buffer. + CMVideoFormatDescriptionRef description = + CMSampleBufferGetFormatDescription(hvcc_sample_buffer); + if (description == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to get sample buffer's description."; + return false; + } + + // Get parameter set information. + int nalu_header_size = 0; + size_t param_set_count = 0; + OSStatus status = CMVideoFormatDescriptionGetHEVCParameterSetAtIndex( + description, 0, nullptr, nullptr, ¶m_set_count, &nalu_header_size); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to get parameter set."; + return false; + } + RTC_CHECK_EQ(nalu_header_size, kAvccHeaderByteSize); + RTC_DCHECK_EQ(param_set_count, 3); + + // Truncate any previous data in the buffer without changing its capacity. + annexb_buffer->SetSize(0); + + size_t nalu_offset = 0; + std::vector frag_offsets; + std::vector frag_lengths; + + // Place all parameter sets at the front of buffer. + if (is_keyframe) { + size_t param_set_size = 0; + const uint8_t* param_set = nullptr; + for (size_t i = 0; i < param_set_count; ++i) { + status = CMVideoFormatDescriptionGetHEVCParameterSetAtIndex( + description, i, ¶m_set, ¶m_set_size, nullptr, nullptr); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to get parameter set."; + return false; + } + // Update buffer. + annexb_buffer->AppendData(kAnnexBHeaderBytes, sizeof(kAnnexBHeaderBytes)); + annexb_buffer->AppendData(reinterpret_cast(param_set), + param_set_size); + // Update fragmentation. + frag_offsets.push_back(nalu_offset + sizeof(kAnnexBHeaderBytes)); + frag_lengths.push_back(param_set_size); + nalu_offset += sizeof(kAnnexBHeaderBytes) + param_set_size; + } + } + + // Get block buffer from the sample buffer. + CMBlockBufferRef block_buffer = + CMSampleBufferGetDataBuffer(hvcc_sample_buffer); + if (block_buffer == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to get sample buffer's block buffer."; + return false; + } + CMBlockBufferRef contiguous_buffer = nullptr; + // Make sure block buffer is contiguous. + if (!CMBlockBufferIsRangeContiguous(block_buffer, 0, 0)) { + status = CMBlockBufferCreateContiguous( + nullptr, block_buffer, nullptr, nullptr, 0, 0, 0, &contiguous_buffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to flatten non-contiguous block buffer: " + << status; + return false; + } + } else { + contiguous_buffer = block_buffer; + // Retain to make cleanup easier. + CFRetain(contiguous_buffer); + block_buffer = nullptr; + } + + // Now copy the actual data. + char* data_ptr = nullptr; + size_t block_buffer_size = CMBlockBufferGetDataLength(contiguous_buffer); + status = CMBlockBufferGetDataPointer(contiguous_buffer, 0, nullptr, nullptr, + &data_ptr); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to get block buffer data."; + CFRelease(contiguous_buffer); + return false; + } + size_t bytes_remaining = block_buffer_size; + while (bytes_remaining > 0) { + // The size type here must match |nalu_header_size|, we expect 4 bytes. + // Read the length of the next packet of data. Must convert from big endian + // to host endian. + RTC_DCHECK_GE(bytes_remaining, (size_t)nalu_header_size); + uint32_t* uint32_data_ptr = reinterpret_cast(data_ptr); + uint32_t packet_size = CFSwapInt32BigToHost(*uint32_data_ptr); + // Update buffer. + annexb_buffer->AppendData(kAnnexBHeaderBytes, sizeof(kAnnexBHeaderBytes)); + annexb_buffer->AppendData(data_ptr + nalu_header_size, packet_size); + // Update fragmentation. + frag_offsets.push_back(nalu_offset + sizeof(kAnnexBHeaderBytes)); + frag_lengths.push_back(packet_size); + nalu_offset += sizeof(kAnnexBHeaderBytes) + packet_size; + + size_t bytes_written = packet_size + sizeof(kAnnexBHeaderBytes); + bytes_remaining -= bytes_written; + data_ptr += bytes_written; + } + RTC_DCHECK_EQ(bytes_remaining, (size_t)0); + + CFRelease(contiguous_buffer); + + return true; +} + +bool H265AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, + size_t annexb_buffer_size, + CMVideoFormatDescriptionRef video_format, + CMSampleBufferRef* out_sample_buffer) { + RTC_DCHECK(annexb_buffer); + RTC_DCHECK(out_sample_buffer); + RTC_DCHECK(video_format); + *out_sample_buffer = nullptr; + + AnnexBBufferReader reader(annexb_buffer, annexb_buffer_size); + if (reader.SeekToNextNaluOfType(H265::kVps)) { + // Buffer contains an SPS NALU - skip it and the following PPS + const uint8_t* data; + size_t data_len; + if (!reader.ReadNalu(&data, &data_len)) { + RTC_LOG(LS_ERROR) << "Failed to read VPS"; + return false; + } + if (!reader.ReadNalu(&data, &data_len)) { + RTC_LOG(LS_ERROR) << "Failed to read SPS"; + return false; + } + if (!reader.ReadNalu(&data, &data_len)) { + RTC_LOG(LS_ERROR) << "Failed to read PPS"; + return false; + } + } else { + // No SPS NALU - start reading from the first NALU in the buffer + reader.SeekToStart(); + } + + // Allocate memory as a block buffer. + // TODO(tkchin): figure out how to use a pool. + CMBlockBufferRef block_buffer = nullptr; + OSStatus status = CMBlockBufferCreateWithMemoryBlock( + nullptr, nullptr, reader.BytesRemainingForAVC(), nullptr, nullptr, 0, + reader.BytesRemainingForAVC(), kCMBlockBufferAssureMemoryNowFlag, + &block_buffer); + if (status != kCMBlockBufferNoErr) { + RTC_LOG(LS_ERROR) << "Failed to create block buffer."; + return false; + } + + // Make sure block buffer is contiguous. + CMBlockBufferRef contiguous_buffer = nullptr; + if (!CMBlockBufferIsRangeContiguous(block_buffer, 0, 0)) { + status = CMBlockBufferCreateContiguous( + nullptr, block_buffer, nullptr, nullptr, 0, 0, 0, &contiguous_buffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to flatten non-contiguous block buffer: " + << status; + CFRelease(block_buffer); + return false; + } + } else { + contiguous_buffer = block_buffer; + block_buffer = nullptr; + } + + // Get a raw pointer into allocated memory. + size_t block_buffer_size = 0; + char* data_ptr = nullptr; + status = CMBlockBufferGetDataPointer(contiguous_buffer, 0, nullptr, + &block_buffer_size, &data_ptr); + if (status != kCMBlockBufferNoErr) { + RTC_LOG(LS_ERROR) << "Failed to get block buffer data pointer."; + CFRelease(contiguous_buffer); + return false; + } + RTC_DCHECK(block_buffer_size == reader.BytesRemainingForAVC()); + + // Write Hvcc NALUs into block buffer memory. + AvccBufferWriter writer(reinterpret_cast(data_ptr), + block_buffer_size); + while (reader.BytesRemaining() > 0) { + const uint8_t* nalu_data_ptr = nullptr; + size_t nalu_data_size = 0; + if (reader.ReadNalu(&nalu_data_ptr, &nalu_data_size)) { + writer.WriteNalu(nalu_data_ptr, nalu_data_size); + } + } + + // Create sample buffer. + status = CMSampleBufferCreate(nullptr, contiguous_buffer, true, nullptr, + nullptr, video_format, 1, 0, nullptr, 0, + nullptr, out_sample_buffer); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to create sample buffer."; + CFRelease(contiguous_buffer); + return false; + } + CFRelease(contiguous_buffer); + return true; +} + CMVideoFormatDescriptionRef CreateVideoFormatDescription( const uint8_t* annexb_buffer, size_t annexb_buffer_size) { @@ -255,6 +467,333 @@ CMVideoFormatDescriptionRef CreateVideoFormatDescription( return description; } +class SpsAndVuiParser : private SpsParser { +public: + struct State : SpsState { + explicit State(const SpsState& spsState) + : SpsState(spsState) + { + } + + uint8_t profile_idc { 0 }; + uint8_t level_idc { 0 }; + bool constraint_set3_flag { false }; + bool bitstream_restriction_flag { false }; + uint64_t max_num_reorder_frames { 0 }; + }; + static std::optional Parse(const std::vector& unpacked_buffer) + { + BitstreamReader reader(unpacked_buffer); + auto spsState = ParseSpsUpToVui(reader); + if (!spsState) { + return { }; + } + State result { *spsState }; + + { + // We are restarting parsing for some values we need and that ParseSpsUpToVui is not giving us. + BitstreamReader reader2(unpacked_buffer); + result.profile_idc = reader2.Read(); + // constraint_set0_flag, constraint_set1_flag, constraint_set2_flag + reader2.ConsumeBits(3); + result.constraint_set3_flag = reader2.Read(); + // constraint_set4_flag, constraint_set5_flag and reserved bits (2) + reader2.ConsumeBits(4); + result.level_idc = reader2.Read(); + if (!reader2.Ok()) { + return { }; + } + } + + if (!spsState->vui_params_present) { + return result; + } + // Based on ANNEX VUI parameters syntax. + + // aspect_ratio_info_present_flag + if (reader.Read()) { + // aspect_ratio_idc + auto aspect_ratio_idc = reader.Read(); + // FIXME Extended_SAR + constexpr uint64_t extendedSar = 255; + if (aspect_ratio_idc == extendedSar) { + // sar_width + reader.ConsumeBits(16); + // sar_height + reader.ConsumeBits(16); + } + } + // overscan_info_present_flag + if (reader.Read()) { + // overscan_appropriate_flag + reader.ConsumeBits(1); + } + // video_signal_type_present_flag + if (reader.Read()) { + // video_format + reader.ConsumeBits(3); + // video_full_range_flag + reader.ConsumeBits(1); + // colour_description_present_flag + if (reader.Read()) { + // colour_primaries + reader.ConsumeBits(8); + // transfer_characteristics + reader.ConsumeBits(8); + // matrix_coefficients + reader.ConsumeBits(8); + } + } + // chroma_loc_info_present_flag + if (reader.Read()) { + // chroma_sample_loc_type_top_field + reader.ReadExponentialGolomb(); + // chroma_sample_loc_type_bottom_field + reader.ReadExponentialGolomb(); + } + // timing_info_present_flag + if (reader.Read()) { + // num_units_in_tick + reader.ConsumeBits(32); + // time_scale + reader.ConsumeBits(32); + // fixed_frame_rate_flag + reader.ConsumeBits(1); + } + // nal_hrd_parameters_present_flag + bool nal_hrd_parameters_present_flag = reader.Read(); + if (nal_hrd_parameters_present_flag) { + // hrd_parameters + skipHRDParameters(reader); + } + // vcl_hrd_parameters_present_flag + bool vcl_hrd_parameters_present_flag = reader.Read(); + if (vcl_hrd_parameters_present_flag) { + // hrd_parameters + skipHRDParameters(reader); + } + if (nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag) { + // low_delay_hrd_flag + reader.ConsumeBits(1); + } + // pic_struct_present_flag + reader.ConsumeBits(1); + // bitstream_restriction_flag + result.bitstream_restriction_flag = reader.Read(); + if (result.bitstream_restriction_flag) { + // motion_vectors_over_pic_boundaries_flag + reader.ConsumeBits(1); + // max_bytes_per_pic_denom + reader.ReadExponentialGolomb(); + // max_bits_per_mb_denom + reader.ReadExponentialGolomb(); + // log2_max_mv_length_horizontal + reader.ReadExponentialGolomb(); + // log2_max_mv_length_vertical + reader.ReadExponentialGolomb(); + // max_num_reorder_frames + result.max_num_reorder_frames = reader.ReadExponentialGolomb(); + // max_dec_frame_buffering + reader.ReadExponentialGolomb(); + } + + if (!reader.Ok()) { + return { }; + } + return result; + } + + static void skipHRDParameters(BitstreamReader& reader) + { + // cpb_cnt_minus1 + auto cpb_cnt_minus1 = reader.ReadExponentialGolomb(); + // bit_rate_scale + // cpb_size_scale + reader.ConsumeBits(8); + for (size_t cptr = 0; cptr <= cpb_cnt_minus1; ++cptr) { + // bit_rate_value_minus1 + reader.ReadExponentialGolomb(); + // cpb_size_value_minus1 + reader.ReadExponentialGolomb(); + // cbr_flag + reader.ConsumeBits(1); + } + // initial_cpb_removal_delay_length_minus1 + // cpb_removal_delay_length_minus1 + // dpb_output_delay_length_minus1 + // time_offset_length + reader.ConsumeBits(20); + } +}; + +// Table A-1 of H.264 spec +static size_t maxDpbMbsFromLevelNumber(uint8_t profile_idc, uint8_t level_idc, bool constraint_set3_flag) +{ + if ((profile_idc == 66 || profile_idc == 77) && level_idc == 11 && constraint_set3_flag) { + // level1b + return 396; + } + H264Level level_casted = static_cast(level_idc); + + switch (level_casted) { + case H264Level::kLevel1: + return 396; + case H264Level::kLevel1_1: + return 900; + case H264Level::kLevel1_2: + case H264Level::kLevel1_3: + case H264Level::kLevel2: + return 2376; + case H264Level::kLevel2_1: + return 4752; + case H264Level::kLevel2_2: + case H264Level::kLevel3: + return 8100; + case H264Level::kLevel3_1: + return 18000; + case H264Level::kLevel3_2: + return 20480; + case H264Level::kLevel4: + return 32768; + case H264Level::kLevel4_1: + return 32768; + case H264Level::kLevel4_2: + return 34816; + case H264Level::kLevel5: + return 110400; + case H264Level::kLevel5_1: + return 184320; + case H264Level::kLevel5_2: + return 184320; + default: + RTC_LOG(LS_ERROR) << "Wrong maxDpbMbsFromLevelNumber"; + return 0; + } +} + +static uint8_t ComputeH264ReorderSizeFromSPS(const SpsAndVuiParser::State& state) { + if (state.pic_order_cnt_type == 2) { + return 0; + } + + uint64_t max_dpb_mbs = maxDpbMbsFromLevelNumber(state.profile_idc, state.level_idc, state.constraint_set3_flag); + uint64_t pic_width_in_mbs = state.pic_width_in_mbs_minus1 + 1; + uint64_t frame_height_in_mbs = (2 - state.frame_mbs_only_flag) * (state.pic_height_in_map_units_minus1 + 1); + uint64_t max_dpb_frames_from_sps = max_dpb_mbs / (pic_width_in_mbs * frame_height_in_mbs); + // We use a max value of 16. + auto max_dpb_frames = static_cast(std::min(max_dpb_frames_from_sps, 16ull)); + + if (state.bitstream_restriction_flag) { + if (state.max_num_reorder_frames < max_dpb_frames) { + return static_cast(state.max_num_reorder_frames); + } else { + return max_dpb_frames; + } + } + if (state.constraint_set3_flag && (state.profile_idc == 44 || state.profile_idc == 86 || state.profile_idc == 100 || state.profile_idc == 110 || state.profile_idc == 122 || state.profile_idc == 244)) { + return 0; + } + return max_dpb_frames; +} + +uint8_t ComputeH264ReorderSizeFromAnnexB(const uint8_t* annexb_buffer, size_t annexb_buffer_size) { + AnnexBBufferReader reader(annexb_buffer, annexb_buffer_size); + if (!reader.SeekToNextNaluOfType(kSps)) { + return 0; + } + const uint8_t* spsData; + size_t spsDataSize; + + if (!reader.ReadNalu(&spsData, &spsDataSize) || spsDataSize <= H264::kNaluTypeSize) { + return 0; + } + + std::vector unpacked_buffer = H264::ParseRbsp(spsData + H264::kNaluTypeSize, spsDataSize - H264::kNaluTypeSize); + auto spsAndVui = SpsAndVuiParser::Parse(unpacked_buffer); + if (!spsAndVui) { + RTC_LOG(LS_ERROR) << "Failed to parse sps."; + return 0; + } + + return ComputeH264ReorderSizeFromSPS(*spsAndVui); +} + +uint8_t ComputeH264ReorderSizeFromAVC(const uint8_t* avcData, size_t avcDataSize) { + std::vector unpacked_buffer { avcData, avcData + avcDataSize }; + BitstreamReader reader(unpacked_buffer); + + // configurationVersion + reader.ConsumeBits(8); + // AVCProfileIndication; + reader.ConsumeBits(8); + // profile_compatibility; + reader.ConsumeBits(8); + // AVCLevelIndication; + reader.ConsumeBits(8); + // bit(6) reserved = '111111'b; + // unsigned int(2) lengthSizeMinusOne; + reader.ConsumeBits(8); + // bit(3) reserved = '111'b; + // unsigned int(5) numOfSequenceParameterSets; + auto numOfSequenceParameterSets = 0x1F & reader.Read(); + + if (!reader.Ok()) { + return 0; + } + + size_t offset = 6; + if (numOfSequenceParameterSets) { + auto size = reader.Read(); + offset += 2; + + reader.ConsumeBits(8 * (size + H264::kNaluTypeSize)); + if (!reader.Ok()) { + return 0; + } + + auto spsAndVui = SpsAndVuiParser::Parse({ avcData + offset + H264::kNaluTypeSize, avcData + offset + size }); + if (spsAndVui) { + return ComputeH264ReorderSizeFromSPS(*spsAndVui); + } + } + return 0; +} + +CMVideoFormatDescriptionRef CreateH265VideoFormatDescription( + const uint8_t* annexb_buffer, + size_t annexb_buffer_size) { + const uint8_t* param_set_ptrs[3] = {}; + size_t param_set_sizes[3] = {}; + AnnexBBufferReader reader(annexb_buffer, annexb_buffer_size); + // Skip everyting before the VPS, then read the VPS, SPS and PPS + if (!reader.SeekToNextNaluOfType(H265::kVps)) { + return nullptr; + } + if (!reader.ReadNalu(¶m_set_ptrs[0], ¶m_set_sizes[0])) { + RTC_LOG(LS_ERROR) << "Failed to read VPS"; + return nullptr; + } + if (!reader.ReadNalu(¶m_set_ptrs[1], ¶m_set_sizes[1])) { + RTC_LOG(LS_ERROR) << "Failed to read SPS"; + return nullptr; + } + if (!reader.ReadNalu(¶m_set_ptrs[2], ¶m_set_sizes[2])) { + RTC_LOG(LS_ERROR) << "Failed to read PPS"; + return nullptr; + } + + // Parse the SPS and PPS into a CMVideoFormatDescription. + CMVideoFormatDescriptionRef description = nullptr; + OSStatus status = CMVideoFormatDescriptionCreateFromHEVCParameterSets( + kCFAllocatorDefault, 3, param_set_ptrs, param_set_sizes, 4, nullptr, + &description); + if (status != noErr) { + RTC_LOG(LS_ERROR) << "Failed to create video format description."; + return nullptr; + } + return description; +} + AnnexBBufferReader::AnnexBBufferReader(const uint8_t* annexb_buffer, size_t length) : start_(annexb_buffer), length_(length) { @@ -289,6 +828,19 @@ size_t AnnexBBufferReader::BytesRemaining() const { return length_ - offset_->start_offset; } +size_t AnnexBBufferReader::BytesRemainingForAVC() const { + if (offset_ == offsets_.end()) { + return 0; + } + auto iterator = offset_; + size_t size = 0; + while (iterator != offsets_.end()) { + size += kAvccHeaderByteSize + iterator->payload_size; + iterator++; + } + return size; +} + void AnnexBBufferReader::SeekToStart() { offset_ = offsets_.begin(); } @@ -302,6 +854,17 @@ bool AnnexBBufferReader::SeekToNextNaluOfType(NaluType type) { } return false; } + +bool AnnexBBufferReader::SeekToNextNaluOfType(H265::NaluType type) { + for (; offset_ != offsets_.end(); ++offset_) { + if (offset_->payload_size < 1) + continue; + if (H265::ParseNaluType(*(start_ + offset_->payload_start_offset)) == type) + return true; + } + return false; +} + AvccBufferWriter::AvccBufferWriter(uint8_t* const avcc_buffer, size_t length) : start_(avcc_buffer), offset_(0), length_(length) { RTC_DCHECK(avcc_buffer); diff --git a/sdk/objc/components/video_codec/nalu_rewriter.h b/sdk/objc/components/video_codec/nalu_rewriter.h index fb1d98ce8d..440cd4b7c9 100644 --- a/sdk/objc/components/video_codec/nalu_rewriter.h +++ b/sdk/objc/components/video_codec/nalu_rewriter.h @@ -17,6 +17,7 @@ #include #include "common_video/h264/h264_common.h" +#include "common_video/h265/h265_common.h" #include "modules/video_coding/codecs/h264/include/h264.h" #include "rtc_base/buffer.h" @@ -43,6 +44,32 @@ bool H264AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, CMSampleBufferRef* out_sample_buffer, CMMemoryPoolRef memory_pool); +uint8_t ComputeH264ReorderSizeFromAnnexB(const uint8_t* annexb_buffer, size_t annexb_buffer_size); +uint8_t ComputeH264ReorderSizeFromAVC(const uint8_t* avcdata, size_t avcdata_size); + +// Converts a sample buffer emitted from the VideoToolbox encoder into a buffer +// suitable for RTP. The sample buffer is in avcc format whereas the rtp buffer +// needs to be in Annex B format. Data is written directly to |annexb_buffer|. +bool H265CMSampleBufferToAnnexBBuffer(CMSampleBufferRef avcc_sample_buffer, + bool is_keyframe, + rtc::Buffer* annexb_buffer); + +// Converts a buffer received from RTP into a sample buffer suitable for the +// VideoToolbox decoder. The RTP buffer is in annex b format whereas the sample +// buffer is in hvcc format. +// If |is_keyframe| is true then |video_format| is ignored since the format will +// be read from the buffer. Otherwise |video_format| must be provided. +// Caller is responsible for releasing the created sample buffer. +bool H265AnnexBBufferToCMSampleBuffer(const uint8_t* annexb_buffer, + size_t annexb_buffer_size, + CMVideoFormatDescriptionRef video_format, + CMSampleBufferRef* out_sample_buffer) + __OSX_AVAILABLE_STARTING(__MAC_10_12, __IPHONE_11_0); + +CMVideoFormatDescriptionRef CreateH265VideoFormatDescription( + const uint8_t* annexb_buffer, + size_t annexb_buffer_size); + // Returns a video format description created from the sps/pps information in // the Annex B buffer. If there is no such information, nullptr is returned. // The caller is responsible for releasing the description. @@ -65,6 +92,7 @@ class AnnexBBufferReader final { // Returns the number of unread NALU bytes, including the size of the header. // If the buffer has no remaining NALUs this will return zero. size_t BytesRemaining() const; + size_t BytesRemainingForAVC() const; // Reset the reader to start reading from the first NALU void SeekToStart(); @@ -74,6 +102,7 @@ class AnnexBBufferReader final { // Return true if a NALU of the desired type is found, false if we // reached the end instead bool SeekToNextNaluOfType(H264::NaluType type); + bool SeekToNextNaluOfType(H265::NaluType type); private: // Returns the the next offset that contains NALU data. diff --git a/sdk/objc/native/src/objc_video_encoder_factory.mm b/sdk/objc/native/src/objc_video_encoder_factory.mm index 1febb40d62..85284777d5 100644 --- a/sdk/objc/native/src/objc_video_encoder_factory.mm +++ b/sdk/objc/native/src/objc_video_encoder_factory.mm @@ -16,6 +16,7 @@ #import "base/RTCVideoEncoder.h" #import "base/RTCVideoEncoderFactory.h" #import "components/video_codec/RTCCodecSpecificInfoH264+Private.h" +#import "components/video_codec/RTCCodecSpecificInfoH265+Private.h" #import "sdk/objc/api/peerconnection/RTCEncodedImage+Private.h" #import "sdk/objc/api/peerconnection/RTCVideoCodecInfo+Private.h" #import "sdk/objc/api/peerconnection/RTCVideoEncoderSettings+Private.h" @@ -67,6 +68,8 @@ int32_t RegisterEncodeCompleteCallback( class]]) { codecSpecificInfo = [(RTC_OBJC_TYPE( RTCCodecSpecificInfoH264) *)info nativeCodecSpecificInfo]; + } else if ([info isKindOfClass:[RTC_OBJC_TYPE(RTCCodecSpecificInfoH265) class]]) { + codecSpecificInfo = [(RTC_OBJC_TYPE(RTCCodecSpecificInfoH265) *)info nativeCodecSpecificInfo]; } EncodedImageCallback::Result res = diff --git a/tools_webrtc/libs/generate_licenses.py b/tools_webrtc/libs/generate_licenses.py index 73fd19ae86..79593ce602 100755 --- a/tools_webrtc/libs/generate_licenses.py +++ b/tools_webrtc/libs/generate_licenses.py @@ -49,6 +49,7 @@ 'third_party/android_deps/libs/' 'com_google_errorprone_error_prone_core/LICENSE' ], + 'ffmpeg': ['third_party/ffmpeg/LICENSE.md'], 'fiat': ['third_party/boringssl/src/third_party/fiat/LICENSE'], 'guava': ['third_party/android_deps/libs/com_google_guava_guava/LICENSE'], 'ijar': ['third_party/ijar/LICENSE'], @@ -63,6 +64,7 @@ 'libyuv': ['third_party/libyuv/LICENSE'], 'llvm-libc': ['third_party/llvm-libc/src/LICENSE.TXT'], 'nasm': ['third_party/nasm/LICENSE'], + 'openh264': ['third_party/openh264/src/LICENSE'], 'opus': ['third_party/opus/src/COPYING'], 'pffft': ['third_party/pffft/LICENSE'], 'protobuf': ['third_party/protobuf/LICENSE'],