From 23f3df92c4f7c2d166620a9e55a8c5c0f6cf5aef Mon Sep 17 00:00:00 2001 From: Dmitry Rogozhkin Date: Tue, 26 Aug 2025 20:45:05 +0000 Subject: [PATCH 1/3] Use cuda filters to support 10-bit videos For: #776 Signed-off-by: Dmitry Rogozhkin --- src/torchcodec/_core/CudaDeviceInterface.cpp | 62 ++++++++++++++++++++ src/torchcodec/_core/CudaDeviceInterface.h | 5 ++ src/torchcodec/_core/DeviceInterface.h | 13 ++++ src/torchcodec/_core/FilterGraph.cpp | 6 +- src/torchcodec/_core/FilterGraph.h | 3 +- src/torchcodec/_core/SingleStreamDecoder.cpp | 36 ++++++++++++ src/torchcodec/_core/SingleStreamDecoder.h | 4 ++ test/test_decoders.py | 23 ++------ 8 files changed, 130 insertions(+), 22 deletions(-) diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp index 74b556ed0..375a9bc20 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.cpp +++ b/src/torchcodec/_core/CudaDeviceInterface.cpp @@ -199,6 +199,68 @@ void CudaDeviceInterface::initializeContext(AVCodecContext* codecContext) { return; } +std::unique_ptr CudaDeviceInterface::initializeFiltersContext( + const VideoStreamOptions& videoStreamOptions, + const UniqueAVFrame& avFrame, + const AVRational& timeBase) { + enum AVPixelFormat frameFormat = + static_cast(avFrame->format); + + if (avFrame->format != AV_PIX_FMT_CUDA) { + auto cpuDevice = torch::Device(torch::kCPU); + auto cpuInterface = createDeviceInterface(cpuDevice); + return cpuInterface->initializeFiltersContext( + videoStreamOptions, avFrame, timeBase); + } + + auto frameDims = + getHeightAndWidthFromOptionsOrAVFrame(videoStreamOptions, avFrame); + int height = frameDims.height; + int width = frameDims.width; + + auto hwFramesCtx = + reinterpret_cast(avFrame->hw_frames_ctx->data); + AVPixelFormat actualFormat = hwFramesCtx->sw_format; + + if (actualFormat == AV_PIX_FMT_NV12) { + return nullptr; + } + + AVPixelFormat outputFormat; + std::stringstream filters; + + unsigned version_int = avfilter_version(); + if (version_int < AV_VERSION_INT(8, 0, 103)) { + // Color conversion support ('format=' option) was added to scale_cuda from + // n5.0. With the earlier version of ffmpeg we have no choice but use CPU + // filters. See: + // https://github.com/FFmpeg/FFmpeg/commit/62dc5df941f5e196164c151691e4274195523e95 + outputFormat = AV_PIX_FMT_RGB24; + + filters << "hwdownload,format=" << av_pix_fmt_desc_get(actualFormat)->name; + filters << ",scale=" << width << ":" << height; + filters << ":sws_flags=bilinear"; + } else { + // Actual output color format will be set via filter options + outputFormat = AV_PIX_FMT_CUDA; + + filters << "scale_cuda=" << width << ":" << height; + filters << ":format=nv12:interp_algo=bilinear"; + } + + return std::make_unique( + avFrame->width, + avFrame->height, + frameFormat, + avFrame->sample_aspect_ratio, + width, + height, + outputFormat, + filters.str(), + timeBase, + av_buffer_ref(avFrame->hw_frames_ctx)); +} + void CudaDeviceInterface::convertAVFrameToFrameOutput( const VideoStreamOptions& videoStreamOptions, [[maybe_unused]] const AVRational& timeBase, diff --git a/src/torchcodec/_core/CudaDeviceInterface.h b/src/torchcodec/_core/CudaDeviceInterface.h index f29caff42..b49908443 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.h +++ b/src/torchcodec/_core/CudaDeviceInterface.h @@ -21,6 +21,11 @@ class CudaDeviceInterface : public DeviceInterface { void initializeContext(AVCodecContext* codecContext) override; + std::unique_ptr initializeFiltersContext( + const VideoStreamOptions& videoStreamOptions, + const UniqueAVFrame& avFrame, + const AVRational& timeBase) override; + void convertAVFrameToFrameOutput( const VideoStreamOptions& videoStreamOptions, const AVRational& timeBase, diff --git a/src/torchcodec/_core/DeviceInterface.h b/src/torchcodec/_core/DeviceInterface.h index 9a7288eb0..7916c81b2 100644 --- a/src/torchcodec/_core/DeviceInterface.h +++ b/src/torchcodec/_core/DeviceInterface.h @@ -12,6 +12,7 @@ #include #include #include "FFMPEGCommon.h" +#include "src/torchcodec/_core/FilterGraph.h" #include "src/torchcodec/_core/Frame.h" #include "src/torchcodec/_core/StreamOptions.h" @@ -33,6 +34,18 @@ class DeviceInterface { // support CUDA and others only support CPU. virtual void initializeContext(AVCodecContext* codecContext) = 0; + // Returns FilterContext if device interface can't handle conversion of the + // frame on its own within a call to convertAVFrameToFrameOutput(). + // FilterContext contains input and output initialization parameters + // describing required conversion. Output can further be passed to + // convertAVFrameToFrameOutput() to generate output tensor. + virtual std::unique_ptr initializeFiltersContext( + [[maybe_unused]] const VideoStreamOptions& videoStreamOptions, + [[maybe_unused]] const UniqueAVFrame& avFrame, + [[maybe_unused]] const AVRational& timeBase) { + return nullptr; + }; + virtual void convertAVFrameToFrameOutput( const VideoStreamOptions& videoStreamOptions, const AVRational& timeBase, diff --git a/src/torchcodec/_core/FilterGraph.cpp b/src/torchcodec/_core/FilterGraph.cpp index 43a12f092..c22875915 100644 --- a/src/torchcodec/_core/FilterGraph.cpp +++ b/src/torchcodec/_core/FilterGraph.cpp @@ -22,7 +22,8 @@ FiltersContext::FiltersContext( int outputHeight, AVPixelFormat outputFormat, const std::string& filtergraphStr, - AVRational timeBase) + AVRational timeBase, + AVBufferRef* hwFramesCtx) : inputWidth(inputWidth), inputHeight(inputHeight), inputFormat(inputFormat), @@ -31,7 +32,8 @@ FiltersContext::FiltersContext( outputHeight(outputHeight), outputFormat(outputFormat), filtergraphStr(filtergraphStr), - timeBase(timeBase) {} + timeBase(timeBase), + hwFramesCtx(hwFramesCtx) {} bool operator==(const AVRational& lhs, const AVRational& rhs) { return lhs.num == rhs.num && lhs.den == rhs.den; diff --git a/src/torchcodec/_core/FilterGraph.h b/src/torchcodec/_core/FilterGraph.h index 4edff6c1b..8cba571bd 100644 --- a/src/torchcodec/_core/FilterGraph.h +++ b/src/torchcodec/_core/FilterGraph.h @@ -35,7 +35,8 @@ struct FiltersContext { int outputHeight, AVPixelFormat outputFormat, const std::string& filtergraphStr, - AVRational timeBase); + AVRational timeBase, + AVBufferRef* hwFramesCtx = nullptr); bool operator==(const FiltersContext&) const; bool operator!=(const FiltersContext&) const; diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index 864e82d0a..01d9f3cf9 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -1251,6 +1251,42 @@ FrameOutput SingleStreamDecoder::convertAVFrameToFrameOutput( deviceInterface_ != nullptr, "No device interface available for video decoding. This ", "shouldn't happen, please report."); + + std::unique_ptr newFiltersContext = + deviceInterface_->initializeFiltersContext( + streamInfo.videoStreamOptions, avFrame, streamInfo.timeBase); + // Device interface might return nullptr for the filter context in which + // case device interface will handle conversion directly in + // convertAVFrameToFrameOutput(). + if (newFiltersContext) { + // We need to compare the current filter context with our previous filter + // context. If they are different, then we need to re-create a filter + // graph. We create a filter graph late so that we don't have to depend + // on the unreliable metadata in the header. And we sometimes re-create + // it because it's possible for frame resolution to change mid-stream. + // Finally, we want to reuse the filter graph as much as possible for + // performance reasons. + if (!filterGraph_ || filtersContext_ != newFiltersContext) { + filterGraph_ = std::make_unique( + *newFiltersContext, streamInfo.videoStreamOptions); + filtersContext_ = std::move(newFiltersContext); + } + avFrame = filterGraph_->convert(avFrame); + + // If this check fails it means the frame wasn't + // reshaped to its expected dimensions by filtergraph. + TORCH_CHECK( + (avFrame->width == filtersContext_->outputWidth) && + (avFrame->height == filtersContext_->outputHeight), + "Expected frame from filter graph of ", + filtersContext_->outputWidth, + "x", + filtersContext_->outputHeight, + ", got ", + avFrame->width, + "x", + avFrame->height); + } deviceInterface_->convertAVFrameToFrameOutput( streamInfo.videoStreamOptions, streamInfo.timeBase, diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h index 027f52fc4..98fbf6cf8 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.h +++ b/src/torchcodec/_core/SingleStreamDecoder.h @@ -351,6 +351,10 @@ class SingleStreamDecoder { SeekMode seekMode_; ContainerMetadata containerMetadata_; UniqueDecodingAVFormatContext formatContext_; + // Current filter context. Used to know whether a new FilterGraph + // should be created to process a next frame. + std::unique_ptr filtersContext_; + std::unique_ptr filterGraph_; std::unique_ptr deviceInterface_; std::map streamInfos_; const int NO_ACTIVE_STREAM = -2; diff --git a/test/test_decoders.py b/test/test_decoders.py index 72b586891..d0525b741 100644 --- a/test/test_decoders.py +++ b/test/test_decoders.py @@ -1225,22 +1225,6 @@ def test_full_and_studio_range_bt709_video(self, asset): elif cuda_version_used_for_building_torch() == (12, 8): assert psnr(gpu_frame, cpu_frame) > 20 - @needs_cuda - def test_10bit_videos_cuda(self): - # Assert that we raise proper error on different kinds of 10bit videos. - - # TODO we should investigate how to support 10bit videos on GPU. - # See https://github.com/pytorch/torchcodec/issues/776 - - asset = H265_10BITS - - decoder = VideoDecoder(asset.path, device="cuda") - with pytest.raises( - RuntimeError, - match="The AVFrame is p010le, but we expected AV_PIX_FMT_NV12.", - ): - decoder.get_frame_at(0) - @needs_cuda def test_10bit_gpu_fallsback_to_cpu(self): # Test for 10-bit videos that aren't supported by NVDEC: we decode and @@ -1272,12 +1256,13 @@ def test_10bit_gpu_fallsback_to_cpu(self): frames_cpu = decoder_cpu.get_frames_at(frame_indices).data assert_frames_equal(frames_gpu.cpu(), frames_cpu) + @pytest.mark.parametrize("device", all_supported_devices()) @pytest.mark.parametrize("asset", (H264_10BITS, H265_10BITS)) - def test_10bit_videos_cpu(self, asset): - # This just validates that we can decode 10-bit videos on CPU. + def test_10bit_videos(self, device, asset): + # This just validates that we can decode 10-bit videos. # TODO validate against the ref that the decoded frames are correct - decoder = VideoDecoder(asset.path) + decoder = VideoDecoder(asset.path, device=device) decoder.get_frame_at(10) def setup_frame_mappings(tmp_path, file, stream_index): From 745ed48df9cc8bb1c3de55df9459fbb35efa2d8c Mon Sep 17 00:00:00 2001 From: Dmitry Rogozhkin Date: Thu, 28 Aug 2025 20:51:16 +0000 Subject: [PATCH 2/3] Implement initializeFiltersContext for CPU device interface Signed-off-by: Dmitry Rogozhkin --- src/torchcodec/_core/CpuDeviceInterface.cpp | 147 ++++++++++---------- src/torchcodec/_core/CpuDeviceInterface.h | 15 +- 2 files changed, 78 insertions(+), 84 deletions(-) diff --git a/src/torchcodec/_core/CpuDeviceInterface.cpp b/src/torchcodec/_core/CpuDeviceInterface.cpp index 77eaf3d09..1eaa3be15 100644 --- a/src/torchcodec/_core/CpuDeviceInterface.cpp +++ b/src/torchcodec/_core/CpuDeviceInterface.cpp @@ -13,6 +13,34 @@ static bool g_cpu = registerDeviceInterface( torch::kCPU, [](const torch::Device& device) { return new CpuDeviceInterface(device); }); +ColorConversionLibrary getColorConversionLibrary( + const VideoStreamOptions& videoStreamOptions, + int width) { + // By default, we want to use swscale for color conversion because it is + // faster. However, it has width requirements, so we may need to fall back + // to filtergraph. We also need to respect what was requested from the + // options; we respect the options unconditionally, so it's possible for + // swscale's width requirements to be violated. We don't expose the ability to + // choose color conversion library publicly; we only use this ability + // internally. + + // swscale requires widths to be multiples of 32: + // https://stackoverflow.com/questions/74351955/turn-off-sw-scale-conversion-to-planar-yuv-32-byte-alignment-requirements + // so we fall back to filtergraph if the width is not a multiple of 32. + auto defaultLibrary = (width % 32 == 0) ? ColorConversionLibrary::SWSCALE + : ColorConversionLibrary::FILTERGRAPH; + + ColorConversionLibrary colorConversionLibrary = + videoStreamOptions.colorConversionLibrary.value_or(defaultLibrary); + + TORCH_CHECK( + colorConversionLibrary == ColorConversionLibrary::SWSCALE || + colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH, + "Invalid color conversion library: ", + static_cast(colorConversionLibrary)); + return colorConversionLibrary; +} + } // namespace CpuDeviceInterface::SwsFrameContext::SwsFrameContext( @@ -46,6 +74,38 @@ CpuDeviceInterface::CpuDeviceInterface(const torch::Device& device) device_.type() == torch::kCPU, "Unsupported device: ", device_.str()); } +std::unique_ptr CpuDeviceInterface::initializeFiltersContext( + const VideoStreamOptions& videoStreamOptions, + const UniqueAVFrame& avFrame, + const AVRational& timeBase) { + enum AVPixelFormat frameFormat = + static_cast(avFrame->format); + auto frameDims = + getHeightAndWidthFromOptionsOrAVFrame(videoStreamOptions, avFrame); + int expectedOutputHeight = frameDims.height; + int expectedOutputWidth = frameDims.width; + + if (getColorConversionLibrary(videoStreamOptions, expectedOutputWidth) == + ColorConversionLibrary::SWSCALE) { + return nullptr; + } + + std::stringstream filters; + filters << "scale=" << expectedOutputWidth << ":" << expectedOutputHeight; + filters << ":sws_flags=bilinear"; + + return std::make_unique( + avFrame->width, + avFrame->height, + frameFormat, + avFrame->sample_aspect_ratio, + expectedOutputWidth, + expectedOutputHeight, + AV_PIX_FMT_RGB24, + filters.str(), + timeBase); +} + // Note [preAllocatedOutputTensor with swscale and filtergraph]: // Callers may pass a pre-allocated tensor, where the output.data tensor will // be stored. This parameter is honored in any case, but it only leads to a @@ -57,7 +117,7 @@ CpuDeviceInterface::CpuDeviceInterface(const torch::Device& device) // `dimension_order` parameter. It's up to callers to re-shape it if needed. void CpuDeviceInterface::convertAVFrameToFrameOutput( const VideoStreamOptions& videoStreamOptions, - const AVRational& timeBase, + [[maybe_unused]] const AVRational& timeBase, UniqueAVFrame& avFrame, FrameOutput& frameOutput, std::optional preAllocatedOutputTensor) { @@ -83,23 +143,8 @@ void CpuDeviceInterface::convertAVFrameToFrameOutput( enum AVPixelFormat frameFormat = static_cast(avFrame->format); - // By default, we want to use swscale for color conversion because it is - // faster. However, it has width requirements, so we may need to fall back - // to filtergraph. We also need to respect what was requested from the - // options; we respect the options unconditionally, so it's possible for - // swscale's width requirements to be violated. We don't expose the ability to - // choose color conversion library publicly; we only use this ability - // internally. - - // swscale requires widths to be multiples of 32: - // https://stackoverflow.com/questions/74351955/turn-off-sw-scale-conversion-to-planar-yuv-32-byte-alignment-requirements - // so we fall back to filtergraph if the width is not a multiple of 32. - auto defaultLibrary = (expectedOutputWidth % 32 == 0) - ? ColorConversionLibrary::SWSCALE - : ColorConversionLibrary::FILTERGRAPH; - ColorConversionLibrary colorConversionLibrary = - videoStreamOptions.colorConversionLibrary.value_or(defaultLibrary); + getColorConversionLibrary(videoStreamOptions, expectedOutputWidth); if (colorConversionLibrary == ColorConversionLibrary::SWSCALE) { // We need to compare the current frame context with our previous frame @@ -137,42 +182,16 @@ void CpuDeviceInterface::convertAVFrameToFrameOutput( frameOutput.data = outputTensor; } else if (colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH) { - // See comment above in swscale branch about the filterGraphContext_ - // creation. creation - std::stringstream filters; - filters << "scale=" << expectedOutputWidth << ":" << expectedOutputHeight; - filters << ":sws_flags=bilinear"; + TORCH_CHECK_EQ(avFrame->format, AV_PIX_FMT_RGB24); - FiltersContext filtersContext( - avFrame->width, - avFrame->height, - frameFormat, - avFrame->sample_aspect_ratio, - expectedOutputWidth, - expectedOutputHeight, - AV_PIX_FMT_RGB24, - filters.str(), - timeBase); - - if (!filterGraphContext_ || prevFiltersContext_ != filtersContext) { - filterGraphContext_ = - std::make_unique(filtersContext, videoStreamOptions); - prevFiltersContext_ = std::move(filtersContext); - } - outputTensor = convertAVFrameToTensorUsingFilterGraph(avFrame); - - // Similarly to above, if this check fails it means the frame wasn't - // reshaped to its expected dimensions by filtergraph. - auto shape = outputTensor.sizes(); - TORCH_CHECK( - (shape.size() == 3) && (shape[0] == expectedOutputHeight) && - (shape[1] == expectedOutputWidth) && (shape[2] == 3), - "Expected output tensor of shape ", - expectedOutputHeight, - "x", - expectedOutputWidth, - "x3, got ", - shape); + std::vector shape = {expectedOutputHeight, expectedOutputWidth, 3}; + std::vector strides = {avFrame->linesize[0], 3, 1}; + AVFrame* avFramePtr = avFrame.release(); + auto deleter = [avFramePtr](void*) { + UniqueAVFrame avFrameToDelete(avFramePtr); + }; + outputTensor = torch::from_blob( + avFramePtr->data[0], shape, strides, deleter, {torch::kUInt8}); if (preAllocatedOutputTensor.has_value()) { // We have already validated that preAllocatedOutputTensor and @@ -182,11 +201,6 @@ void CpuDeviceInterface::convertAVFrameToFrameOutput( } else { frameOutput.data = outputTensor; } - } else { - TORCH_CHECK( - false, - "Invalid color conversion library: ", - static_cast(colorConversionLibrary)); } } @@ -208,25 +222,6 @@ int CpuDeviceInterface::convertAVFrameToTensorUsingSwsScale( return resultHeight; } -torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph( - const UniqueAVFrame& avFrame) { - UniqueAVFrame filteredAVFrame = filterGraphContext_->convert(avFrame); - - TORCH_CHECK_EQ(filteredAVFrame->format, AV_PIX_FMT_RGB24); - - auto frameDims = getHeightAndWidthFromResizedAVFrame(*filteredAVFrame.get()); - int height = frameDims.height; - int width = frameDims.width; - std::vector shape = {height, width, 3}; - std::vector strides = {filteredAVFrame->linesize[0], 3, 1}; - AVFrame* filteredAVFramePtr = filteredAVFrame.release(); - auto deleter = [filteredAVFramePtr](void*) { - UniqueAVFrame avFrameToDelete(filteredAVFramePtr); - }; - return torch::from_blob( - filteredAVFramePtr->data[0], shape, strides, deleter, {torch::kUInt8}); -} - void CpuDeviceInterface::createSwsContext( const SwsFrameContext& swsFrameContext, const enum AVColorSpace colorspace) { diff --git a/src/torchcodec/_core/CpuDeviceInterface.h b/src/torchcodec/_core/CpuDeviceInterface.h index d6004ca3b..f1982fb93 100644 --- a/src/torchcodec/_core/CpuDeviceInterface.h +++ b/src/torchcodec/_core/CpuDeviceInterface.h @@ -26,6 +26,11 @@ class CpuDeviceInterface : public DeviceInterface { void initializeContext( [[maybe_unused]] AVCodecContext* codecContext) override {} + std::unique_ptr initializeFiltersContext( + const VideoStreamOptions& videoStreamOptions, + const UniqueAVFrame& avFrame, + const AVRational& timeBase) override; + void convertAVFrameToFrameOutput( const VideoStreamOptions& videoStreamOptions, const AVRational& timeBase, @@ -39,9 +44,6 @@ class CpuDeviceInterface : public DeviceInterface { const UniqueAVFrame& avFrame, torch::Tensor& outputTensor); - torch::Tensor convertAVFrameToTensorUsingFilterGraph( - const UniqueAVFrame& avFrame); - struct SwsFrameContext { int inputWidth = 0; int inputHeight = 0; @@ -64,15 +66,12 @@ class CpuDeviceInterface : public DeviceInterface { const SwsFrameContext& swsFrameContext, const enum AVColorSpace colorspace); - // color-conversion fields. Only one of FilterGraphContext and - // UniqueSwsContext should be non-null. - std::unique_ptr filterGraphContext_; + // SWS color conversion context UniqueSwsContext swsContext_; - // Used to know whether a new FilterGraphContext or UniqueSwsContext should + // Used to know whether a new UniqueSwsContext should // be created before decoding a new frame. SwsFrameContext prevSwsFrameContext_; - FiltersContext prevFiltersContext_; }; } // namespace facebook::torchcodec From 8f899e16d8154512a2548b23ace49c0c3804a0bc Mon Sep 17 00:00:00 2001 From: Dmitry Rogozhkin Date: Tue, 2 Sep 2025 18:45:17 +0000 Subject: [PATCH 3/3] Drop timeBase from convertAVFrameToFrameOutput API Signed-off-by: Dmitry Rogozhkin --- src/torchcodec/_core/CpuDeviceInterface.cpp | 1 - src/torchcodec/_core/CpuDeviceInterface.h | 1 - src/torchcodec/_core/CudaDeviceInterface.cpp | 7 +------ src/torchcodec/_core/CudaDeviceInterface.h | 1 - src/torchcodec/_core/DeviceInterface.h | 1 - src/torchcodec/_core/SingleStreamDecoder.cpp | 1 - 6 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/torchcodec/_core/CpuDeviceInterface.cpp b/src/torchcodec/_core/CpuDeviceInterface.cpp index 1eaa3be15..7aa0b48ce 100644 --- a/src/torchcodec/_core/CpuDeviceInterface.cpp +++ b/src/torchcodec/_core/CpuDeviceInterface.cpp @@ -117,7 +117,6 @@ std::unique_ptr CpuDeviceInterface::initializeFiltersContext( // `dimension_order` parameter. It's up to callers to re-shape it if needed. void CpuDeviceInterface::convertAVFrameToFrameOutput( const VideoStreamOptions& videoStreamOptions, - [[maybe_unused]] const AVRational& timeBase, UniqueAVFrame& avFrame, FrameOutput& frameOutput, std::optional preAllocatedOutputTensor) { diff --git a/src/torchcodec/_core/CpuDeviceInterface.h b/src/torchcodec/_core/CpuDeviceInterface.h index f1982fb93..1e86ded6b 100644 --- a/src/torchcodec/_core/CpuDeviceInterface.h +++ b/src/torchcodec/_core/CpuDeviceInterface.h @@ -33,7 +33,6 @@ class CpuDeviceInterface : public DeviceInterface { void convertAVFrameToFrameOutput( const VideoStreamOptions& videoStreamOptions, - const AVRational& timeBase, UniqueAVFrame& avFrame, FrameOutput& frameOutput, std::optional preAllocatedOutputTensor = diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp index 375a9bc20..b34b2a6f3 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.cpp +++ b/src/torchcodec/_core/CudaDeviceInterface.cpp @@ -263,7 +263,6 @@ std::unique_ptr CudaDeviceInterface::initializeFiltersContext( void CudaDeviceInterface::convertAVFrameToFrameOutput( const VideoStreamOptions& videoStreamOptions, - [[maybe_unused]] const AVRational& timeBase, UniqueAVFrame& avFrame, FrameOutput& frameOutput, std::optional preAllocatedOutputTensor) { @@ -281,11 +280,7 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput( FrameOutput cpuFrameOutput; cpuInterface->convertAVFrameToFrameOutput( - videoStreamOptions, - timeBase, - avFrame, - cpuFrameOutput, - preAllocatedOutputTensor); + videoStreamOptions, avFrame, cpuFrameOutput, preAllocatedOutputTensor); frameOutput.data = cpuFrameOutput.data.to(device_); return; diff --git a/src/torchcodec/_core/CudaDeviceInterface.h b/src/torchcodec/_core/CudaDeviceInterface.h index b49908443..45419b9f4 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.h +++ b/src/torchcodec/_core/CudaDeviceInterface.h @@ -28,7 +28,6 @@ class CudaDeviceInterface : public DeviceInterface { void convertAVFrameToFrameOutput( const VideoStreamOptions& videoStreamOptions, - const AVRational& timeBase, UniqueAVFrame& avFrame, FrameOutput& frameOutput, std::optional preAllocatedOutputTensor = diff --git a/src/torchcodec/_core/DeviceInterface.h b/src/torchcodec/_core/DeviceInterface.h index 7916c81b2..90c05f049 100644 --- a/src/torchcodec/_core/DeviceInterface.h +++ b/src/torchcodec/_core/DeviceInterface.h @@ -48,7 +48,6 @@ class DeviceInterface { virtual void convertAVFrameToFrameOutput( const VideoStreamOptions& videoStreamOptions, - const AVRational& timeBase, UniqueAVFrame& avFrame, FrameOutput& frameOutput, std::optional preAllocatedOutputTensor = std::nullopt) = 0; diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index 01d9f3cf9..58e41a686 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -1289,7 +1289,6 @@ FrameOutput SingleStreamDecoder::convertAVFrameToFrameOutput( } deviceInterface_->convertAVFrameToFrameOutput( streamInfo.videoStreamOptions, - streamInfo.timeBase, avFrame, frameOutput, preAllocatedOutputTensor);