From 56a96fb2abb22feaeb0fb6261982a338f8395713 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Tue, 23 Sep 2025 13:47:34 -0400 Subject: [PATCH 01/28] check in testsrc mp4 and all_frames info --- .../testsrc2.mp4.stream0.all_frames_info.json | 1202 +++++++++++++++++ test/utils.py | 11 + 2 files changed, 1213 insertions(+) create mode 100644 test/resources/testsrc2.mp4.stream0.all_frames_info.json diff --git a/test/resources/testsrc2.mp4.stream0.all_frames_info.json b/test/resources/testsrc2.mp4.stream0.all_frames_info.json new file mode 100644 index 000000000..4272e30ad --- /dev/null +++ b/test/resources/testsrc2.mp4.stream0.all_frames_info.json @@ -0,0 +1,1202 @@ +[ + { + "pts_time": "0.000000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.016667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.033333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.050000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.066667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.083333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.100000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.116667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.133333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.150000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.166667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.183333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.200000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.216667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.233333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.250000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.266667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.283333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.300000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.316667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.333333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.350000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.366667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.383333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.400000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.416667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.433333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.450000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.466667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.483333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.500000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.516667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.533333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.550000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.566667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.583333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.600000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.616667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.633333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.650000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.666667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.683333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.700000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.716667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.733333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.750000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.766667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.783333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.800000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.816667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.833333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.850000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.866667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.883333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.900000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.916667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.933333", + "duration_time": "0.016667" + }, + { + "pts_time": "0.950000", + "duration_time": "0.016667" + }, + { + "pts_time": "0.966667", + "duration_time": "0.016667" + }, + { + "pts_time": "0.983333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.000000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.016667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.033333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.050000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.066667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.083333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.100000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.116667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.133333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.150000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.166667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.183333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.200000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.216667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.233333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.250000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.266667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.283333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.300000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.316667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.333333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.350000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.366667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.383333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.400000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.416667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.433333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.450000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.466667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.483333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.500000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.516667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.533333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.550000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.566667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.583333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.600000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.616667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.633333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.650000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.666667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.683333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.700000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.716667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.733333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.750000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.766667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.783333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.800000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.816667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.833333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.850000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.866667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.883333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.900000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.916667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.933333", + "duration_time": "0.016667" + }, + { + "pts_time": "1.950000", + "duration_time": "0.016667" + }, + { + "pts_time": "1.966667", + "duration_time": "0.016667" + }, + { + "pts_time": "1.983333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.000000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.016667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.033333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.050000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.066667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.083333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.100000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.116667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.133333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.150000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.166667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.183333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.200000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.216667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.233333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.250000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.266667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.283333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.300000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.316667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.333333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.350000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.366667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.383333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.400000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.416667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.433333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.450000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.466667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.483333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.500000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.516667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.533333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.550000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.566667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.583333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.600000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.616667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.633333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.650000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.666667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.683333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.700000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.716667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.733333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.750000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.766667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.783333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.800000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.816667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.833333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.850000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.866667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.883333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.900000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.916667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.933333", + "duration_time": "0.016667" + }, + { + "pts_time": "2.950000", + "duration_time": "0.016667" + }, + { + "pts_time": "2.966667", + "duration_time": "0.016667" + }, + { + "pts_time": "2.983333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.000000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.016667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.033333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.050000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.066667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.083333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.100000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.116667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.133333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.150000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.166667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.183333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.200000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.216667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.233333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.250000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.266667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.283333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.300000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.316667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.333333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.350000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.366667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.383333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.400000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.416667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.433333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.450000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.466667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.483333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.500000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.516667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.533333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.550000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.566667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.583333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.600000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.616667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.633333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.650000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.666667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.683333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.700000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.716667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.733333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.750000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.766667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.783333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.800000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.816667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.833333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.850000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.866667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.883333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.900000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.916667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.933333", + "duration_time": "0.016667" + }, + { + "pts_time": "3.950000", + "duration_time": "0.016667" + }, + { + "pts_time": "3.966667", + "duration_time": "0.016667" + }, + { + "pts_time": "3.983333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.000000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.016667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.033333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.050000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.066667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.083333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.100000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.116667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.133333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.150000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.166667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.183333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.200000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.216667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.233333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.250000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.266667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.283333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.300000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.316667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.333333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.350000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.366667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.383333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.400000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.416667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.433333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.450000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.466667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.483333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.500000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.516667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.533333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.550000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.566667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.583333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.600000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.616667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.633333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.650000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.666667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.683333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.700000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.716667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.733333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.750000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.766667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.783333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.800000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.816667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.833333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.850000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.866667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.883333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.900000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.916667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.933333", + "duration_time": "0.016667" + }, + { + "pts_time": "4.950000", + "duration_time": "0.016667" + }, + { + "pts_time": "4.966667", + "duration_time": "0.016667" + }, + { + "pts_time": "4.983333", + "duration_time": "0.016667" + } +] diff --git a/test/utils.py b/test/utils.py index 7c91f307c..20522ff1e 100644 --- a/test/utils.py +++ b/test/utils.py @@ -411,6 +411,17 @@ def get_empty_chw_tensor(self, *, stream_index: int) -> torch.Tensor: frames={}, # Automatically loaded from json file ) +# Video generated with: +# ffmpeg -y -f lavfi -i testsrc2 -t 5 -c:v h264 -r 60 -g 600 -pix_fmt yuv420p testsrc2.mp4 +TESTSRC2_VIDEO = TestVideo( + filename="testsrc2.mp4", + default_stream_index=0, + stream_infos={ + 0: TestVideoStreamInfo(width=320, height=240, num_color_channels=3), + }, + frames={}, # Automatically loaded from json file +) + # Video generated with: # ffmpeg -f lavfi -i testsrc2=duration=1:size=200x200:rate=30 -c:v libx265 -pix_fmt yuv420p10le -preset fast -crf 23 h265_10bits.mp4 H265_10BITS = TestVideo( From f09fe35660cd6e765e35313c7ef92b8ec5e465f2 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Tue, 23 Sep 2025 13:48:44 -0400 Subject: [PATCH 02/28] Use testsrc in round trip test --- test/test_ops.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index c3233a4f9..f0ebe2fbd 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -56,6 +56,7 @@ SINE_MONO_S32_44100, SINE_MONO_S32_8000, unsplit_device_str, + TESTSRC2_VIDEO, ) torch._dynamo.config.capture_dynamic_output_shape_ops = True @@ -1382,10 +1383,10 @@ def decode(self, file_path) -> torch.Tensor: return frames @pytest.mark.parametrize("format", ("mov", "mp4", "avi")) - # TODO-VideoEncoder: enable additional formats (mkv, webm) + # TODO-VideoEncoder: enable additional formats ("mkv", "webm", "flv") + # via user selected video codecs def test_video_encoder_test_round_trip(self, tmp_path, format): - # TODO-VideoEncoder: Test with FFmpeg's testsrc2 video - asset = NASA_VIDEO + asset = TESTSRC2_VIDEO # Test that decode(encode(decode(asset))) == decode(asset) source_frames = self.decode(str(asset.path)).data @@ -1399,6 +1400,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format): for s_frame, rt_frame in zip(source_frames, round_trip_frames): res = psnr(s_frame, rt_frame) assert res > 30 + torch.testing.assert_close(s_frame, rt_frame, atol=2, rtol=0) if __name__ == "__main__": From 5c94fdac86dd0268d448d806a6ed66dad5c081ea Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Tue, 23 Sep 2025 13:49:41 -0400 Subject: [PATCH 03/28] add func to access pix_fmts in FFmpeg7+ --- src/torchcodec/_core/FFMPEGCommon.cpp | 20 ++++++++++++++++++++ src/torchcodec/_core/FFMPEGCommon.h | 1 + 2 files changed, 21 insertions(+) diff --git a/src/torchcodec/_core/FFMPEGCommon.cpp b/src/torchcodec/_core/FFMPEGCommon.cpp index 172bfeb76..f6247208c 100644 --- a/src/torchcodec/_core/FFMPEGCommon.cpp +++ b/src/torchcodec/_core/FFMPEGCommon.cpp @@ -90,6 +90,26 @@ const int* getSupportedSampleRates(const AVCodec& avCodec) { return supportedSampleRates; } +const AVPixelFormat* getSupportedPixelFormats(const AVCodec& avCodec) { + const AVPixelFormat* supportedPixelFormats = nullptr; +#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 13, 100) + int numPixelFormats = 0; + int ret = avcodec_get_supported_config( + nullptr, + &avCodec, + AV_CODEC_CONFIG_PIX_FORMAT, + 0, + reinterpret_cast(&supportedPixelFormats), + &numPixelFormats); + if (ret < 0 || supportedPixelFormats == nullptr) { + TORCH_CHECK(false, "Couldn't get supported pixel formats from encoder."); + } +#else + supportedPixelFormats = avCodec.pix_fmts; +#endif + return supportedPixelFormats; +} + const AVSampleFormat* getSupportedOutputSampleFormats(const AVCodec& avCodec) { const AVSampleFormat* supportedSampleFormats = nullptr; #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 13, 100) // FFmpeg >= 7.1 diff --git a/src/torchcodec/_core/FFMPEGCommon.h b/src/torchcodec/_core/FFMPEGCommon.h index 92a262d26..19cddcc37 100644 --- a/src/torchcodec/_core/FFMPEGCommon.h +++ b/src/torchcodec/_core/FFMPEGCommon.h @@ -168,6 +168,7 @@ void setDuration(const UniqueAVFrame& frame, int64_t duration); const int* getSupportedSampleRates(const AVCodec& avCodec); const AVSampleFormat* getSupportedOutputSampleFormats(const AVCodec& avCodec); +const AVPixelFormat* getSupportedPixelFormats(const AVCodec& avCodec); int getNumChannels(const UniqueAVFrame& avFrame); int getNumChannels(const UniqueAVCodecContext& avCodecContext); From f43872902124eee9443b4903f96bc459a735d5a0 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 26 Sep 2025 15:11:57 -0400 Subject: [PATCH 04/28] fix 6 container fmts --- src/torchcodec/_core/Encoder.cpp | 86 +++++++++++++++++++++----------- src/torchcodec/_core/Encoder.h | 1 + test/test_ops.py | 5 +- 3 files changed, 61 insertions(+), 31 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 9a1f4ee87..1e52dbba5 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -1,4 +1,7 @@ #include +extern "C" { +#include +} #include "src/torchcodec/_core/AVIOTensorContext.h" #include "src/torchcodec/_core/Encoder.h" @@ -579,23 +582,28 @@ VideoEncoder::VideoEncoder( void VideoEncoder::initializeEncoder( const VideoStreamOptions& videoStreamOptions) { + av_log_set_level(AV_LOG_DEBUG); + + // Always try default + // This works for flv (format accepts libx264, but errors) + // but fails for avi (should use libx264, but defaults to mpeg4) const AVCodec* avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); + // Try libx264 first, then fallback to default ffmpeg + // const AVCodec* avCodec = avcodec_find_encoder(AV_CODEC_ID_H264); + // if (avCodec == nullptr || avformat_query_codec(avFormatContext_->oformat, + // avCodec->id, 0) == 0) { + // std::cout << "for " << avFormatContext_->oformat + // << ", 264 was unavailable or unsupported! " << std::endl; + // avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); + // } TORCH_CHECK(avCodec != nullptr, "Video codec not found"); + std::cout << "Using codec: " << avCodec->name << std::endl; AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec); TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context."); avCodecContext_.reset(avCodecContext); - // Set encoding options - // TODO-VideoEncoder: Allow bitrate to be set - std::optional desiredBitRate = videoStreamOptions.bitRate; - if (desiredBitRate.has_value()) { - TORCH_CHECK( - *desiredBitRate >= 0, "bit_rate=", *desiredBitRate, " must be >= 0."); - } - avCodecContext_->bit_rate = desiredBitRate.value_or(0); - // Store dimension order and input pixel format // TODO-VideoEncoder: Remove assumption that tensor in NCHW format auto sizes = frames_.sizes(); @@ -608,9 +616,25 @@ void VideoEncoder::initializeEncoder( outWidth_ = inWidth_; outHeight_ = inHeight_; - // Use YUV420P as default output format + // Use YUV444P as default output format for lossless encoding // TODO-VideoEncoder: Enable other pixel formats - outPixelFormat_ = AV_PIX_FMT_YUV420P; + // outPixelFormat_ = AV_PIX_FMT_YUV444P; + // outPixelFormat_ = AV_PIX_FMT_YUV420P; + + // use first? + // outPixelFormat_ = getSupportedPixelFormats(*avCodec)[0]; + + // Let FFmpeg choose best pixel format to minimize loss + int loss = 0; + outPixelFormat_ = avcodec_find_best_pix_fmt_of_list( + getSupportedPixelFormats(*avCodec), // List of codec-supported formats + AV_PIX_FMT_GBRP, // We reorder input to GBRP currently + 0, // No alpha channel + &loss // Information about conversion losses + ); + TORCH_CHECK(outPixelFormat_ != -1, "Failed to find best pix fmt") + std::cout << "Using pixel format: " << av_get_pix_fmt_name(outPixelFormat_) + << std::endl; // Configure codec parameters avCodecContext_->codec_id = avCodec->id; @@ -621,44 +645,45 @@ void VideoEncoder::initializeEncoder( avCodecContext_->time_base = {1, inFrameRate_}; avCodecContext_->framerate = {inFrameRate_, 1}; - // TODO-VideoEncoder: Allow GOP size and max B-frames to be set - if (videoStreamOptions.gopSize.has_value()) { - avCodecContext_->gop_size = *videoStreamOptions.gopSize; - } else { - avCodecContext_->gop_size = 12; // Default GOP size - } - - if (videoStreamOptions.maxBFrames.has_value()) { - avCodecContext_->max_b_frames = *videoStreamOptions.maxBFrames; - } else { - avCodecContext_->max_b_frames = 0; // No max B-frames to reduce compression + // Set global header flag for containers that need it (like Matroska) + // This populates extradata to enable mkv encoding + // https://stackoverflow.com/questions/60278773/invalid-data-when-creating-mkv-container-with-h264-stream-because-extradata-is-n + if (avFormatContext_->oformat->flags & AVFMT_GLOBALHEADER) { + avCodecContext_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } + // AVDictionary* options = nullptr; + // av_dict_set(&options, "preset", "veryslow", 0); // Highest quality encoding + // av_dict_set(&options, "crf", "0", 0); // Needed to produce lossless videos + // int status = avcodec_open2(avCodecContext_.get(), avCodec, &options); + // av_dict_free(&options); int status = avcodec_open2(avCodecContext_.get(), avCodec, nullptr); TORCH_CHECK( status == AVSUCCESS, "avcodec_open2 failed: ", getFFMPEGErrorStringFromErrorCode(status)); - AVStream* avStream = avformat_new_stream(avFormatContext_.get(), nullptr); - TORCH_CHECK(avStream != nullptr, "Couldn't create new stream."); + avStream_ = avformat_new_stream(avFormatContext_.get(), nullptr); + TORCH_CHECK(avStream_ != nullptr, "Couldn't create new stream."); // Set the stream time base to encode correct frame timestamps - avStream->time_base = avCodecContext_->time_base; + avStream_->time_base = avCodecContext_->time_base; status = avcodec_parameters_from_context( - avStream->codecpar, avCodecContext_.get()); + avStream_->codecpar, avCodecContext_.get()); TORCH_CHECK( status == AVSUCCESS, "avcodec_parameters_from_context failed: ", getFFMPEGErrorStringFromErrorCode(status)); - streamIndex_ = avStream->index; + streamIndex_ = avStream_->index; } void VideoEncoder::encode() { + av_log_set_level(AV_LOG_DEBUG); // To be on the safe side we enforce that encode() can only be called once TORCH_CHECK(!encodeWasCalled_, "Cannot call encode() twice."); encodeWasCalled_ = true; + av_dump_format(avFormatContext_.get(), 0, avFormatContext_->url, 1); int status = avformat_write_header(avFormatContext_.get(), nullptr); TORCH_CHECK( status == AVSUCCESS, @@ -757,7 +782,7 @@ void VideoEncoder::encodeFrame( "Error while sending frame: ", getFFMPEGErrorStringFromErrorCode(status)); - while (true) { + while (status >= 0) { ReferenceAVPacket packet(autoAVPacket); status = avcodec_receive_packet(avCodecContext_.get(), packet.get()); if (status == AVERROR(EAGAIN) || status == AVERROR_EOF) { @@ -776,6 +801,11 @@ void VideoEncoder::encodeFrame( "Error receiving packet: ", getFFMPEGErrorStringFromErrorCode(status)); + if (packet->duration == 0) { + packet->duration = 1; + } + av_packet_rescale_ts( + packet.get(), avCodecContext_->time_base, avStream_->time_base); packet->stream_index = streamIndex_; status = av_interleaved_write_frame(avFormatContext_.get(), packet.get()); diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h index 81d8d1975..14d5fca2e 100644 --- a/src/torchcodec/_core/Encoder.h +++ b/src/torchcodec/_core/Encoder.h @@ -154,6 +154,7 @@ class VideoEncoder { UniqueEncodingAVFormatContext avFormatContext_; UniqueAVCodecContext avCodecContext_; int streamIndex_ = -1; + AVStream* avStream_; UniqueSwsContext swsContext_; const torch::Tensor frames_; diff --git a/test/test_ops.py b/test/test_ops.py index f0ebe2fbd..60437e88f 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1382,9 +1382,8 @@ def decode(self, file_path) -> torch.Tensor: frames, *_ = get_frames_in_range(decoder, start=0, stop=60) return frames - @pytest.mark.parametrize("format", ("mov", "mp4", "avi")) + @pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv")) # TODO-VideoEncoder: enable additional formats ("mkv", "webm", "flv") - # via user selected video codecs def test_video_encoder_test_round_trip(self, tmp_path, format): asset = TESTSRC2_VIDEO @@ -1400,7 +1399,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format): for s_frame, rt_frame in zip(source_frames, round_trip_frames): res = psnr(s_frame, rt_frame) assert res > 30 - torch.testing.assert_close(s_frame, rt_frame, atol=2, rtol=0) + torch.testing.assert_close(s_frame, rt_frame, atol=0, rtol=0) if __name__ == "__main__": From 1162beb967f36c038b69a8d4b9db6c151c09cd55 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 26 Sep 2025 15:35:42 -0400 Subject: [PATCH 05/28] pass crf variable in custom ops --- src/torchcodec/_core/Encoder.cpp | 18 ++++++++++++------ src/torchcodec/_core/StreamOptions.h | 6 +++--- src/torchcodec/_core/custom_ops.cpp | 6 ++++-- src/torchcodec/_core/ops.py | 1 + test/test_ops.py | 2 +- 5 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 1e52dbba5..b6e0acf61 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -652,12 +652,18 @@ void VideoEncoder::initializeEncoder( avCodecContext_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } - // AVDictionary* options = nullptr; - // av_dict_set(&options, "preset", "veryslow", 0); // Highest quality encoding - // av_dict_set(&options, "crf", "0", 0); // Needed to produce lossless videos - // int status = avcodec_open2(avCodecContext_.get(), avCodec, &options); - // av_dict_free(&options); - int status = avcodec_open2(avCodecContext_.get(), avCodec, nullptr); + // accept optional args + AVDictionary* options = nullptr; + if (videoStreamOptions.crf.has_value()) { + av_dict_set( + &options, + "crf", + "0", + videoStreamOptions.crf.value()); // Needed to produce lossless videos + } + int status = avcodec_open2(avCodecContext_.get(), avCodec, &options); + av_dict_free(&options); + TORCH_CHECK( status == AVSUCCESS, "avcodec_open2 failed: ", diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h index 9b02cceca..7728a676e 100644 --- a/src/torchcodec/_core/StreamOptions.h +++ b/src/torchcodec/_core/StreamOptions.h @@ -45,9 +45,9 @@ struct VideoStreamOptions { std::string_view deviceVariant = "default"; // Encoding options - std::optional bitRate; - std::optional gopSize; - std::optional maxBFrames; + // TODO-VideoEncoder: Consider adding other optional fields here + // (bit rate, gop size, max b frames, preset) + std::optional crf; }; struct AudioStreamOptions { diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index 57753ad42..e23417bc0 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -33,7 +33,7 @@ TORCH_LIBRARY(torchcodec_ns, m) { m.def( "encode_audio_to_file(Tensor samples, int sample_rate, str filename, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()"); m.def( - "encode_video_to_file(Tensor frames, int frame_rate, str filename) -> ()"); + "encode_video_to_file(Tensor frames, int frame_rate, str filename, int crf) -> ()"); m.def( "encode_audio_to_tensor(Tensor samples, int sample_rate, str format, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> Tensor"); m.def( @@ -501,8 +501,10 @@ OpsAudioFramesOutput get_frames_by_pts_in_range_audio( void encode_video_to_file( const at::Tensor& frames, int64_t frame_rate, - std::string_view file_name) { + std::string_view file_name, + int64_t crf) { VideoStreamOptions videoStreamOptions; + videoStreamOptions.crf = crf; VideoEncoder( frames, validateInt64ToInt(frame_rate, "frame_rate"), diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index bfb036c76..bb2900d91 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -259,6 +259,7 @@ def encode_video_to_file_abstract( frames: torch.Tensor, frame_rate: int, filename: str, + crf: int, ) -> None: return diff --git a/test/test_ops.py b/test/test_ops.py index 60437e88f..be9371265 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1392,7 +1392,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format): encoded_path = str(tmp_path / f"encoder_output.{format}") frame_rate = 30 # Frame rate is fixed with num frames decoded - encode_video_to_file(source_frames, frame_rate, encoded_path) + encode_video_to_file(source_frames, frame_rate, encoded_path, crf=0) round_trip_frames = self.decode(encoded_path).data # Check that PSNR for decode(encode(samples)) is above 30 From ea85cfeb8c614cda439826a98c6f142acdac59b0 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 26 Sep 2025 18:11:28 -0400 Subject: [PATCH 06/28] Add tolerances for various cases --- test/test_ops.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index be9371265..df40fe4c3 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -9,7 +9,7 @@ import os from functools import partial -from .utils import in_fbcode +from .utils import get_ffmpeg_major_version, in_fbcode os.environ["TORCH_LOGS"] = "output_code" import json @@ -1383,9 +1383,18 @@ def decode(self, file_path) -> torch.Tensor: return frames @pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv")) - # TODO-VideoEncoder: enable additional formats ("mkv", "webm", "flv") def test_video_encoder_test_round_trip(self, tmp_path, format): - asset = TESTSRC2_VIDEO + + ffmpeg_version = get_ffmpeg_major_version() + if ffmpeg_version == 4 and format == "webm": + pytest.skip("Codec for webm is not available in the FFmpeg4 installation.") + # The output pixel format depends on the codecs available, and FFmpeg version. + # In the cases where YUV420P is chosen and chroma subsampling happens, we need higher tolerance. + if ffmpeg_version == 6 or format in ("avi", "flv"): + atol = 55 + else: + atol = 2 + asset = NASA_VIDEO # Test that decode(encode(decode(asset))) == decode(asset) source_frames = self.decode(str(asset.path)).data @@ -1399,7 +1408,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format): for s_frame, rt_frame in zip(source_frames, round_trip_frames): res = psnr(s_frame, rt_frame) assert res > 30 - torch.testing.assert_close(s_frame, rt_frame, atol=0, rtol=0) + torch.testing.assert_close(s_frame, rt_frame, atol=atol, rtol=0) if __name__ == "__main__": From f0fffca88be1b931f45c83eafda0a7e8326e42ed Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 26 Sep 2025 19:43:15 -0400 Subject: [PATCH 07/28] clean up logging, comments --- src/torchcodec/_core/Encoder.cpp | 46 +++++++------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index b6e0acf61..a4555ddd0 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -1,7 +1,4 @@ #include -extern "C" { -#include -} #include "src/torchcodec/_core/AVIOTensorContext.h" #include "src/torchcodec/_core/Encoder.h" @@ -582,23 +579,9 @@ VideoEncoder::VideoEncoder( void VideoEncoder::initializeEncoder( const VideoStreamOptions& videoStreamOptions) { - av_log_set_level(AV_LOG_DEBUG); - - // Always try default - // This works for flv (format accepts libx264, but errors) - // but fails for avi (should use libx264, but defaults to mpeg4) const AVCodec* avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); - // Try libx264 first, then fallback to default ffmpeg - // const AVCodec* avCodec = avcodec_find_encoder(AV_CODEC_ID_H264); - // if (avCodec == nullptr || avformat_query_codec(avFormatContext_->oformat, - // avCodec->id, 0) == 0) { - // std::cout << "for " << avFormatContext_->oformat - // << ", 264 was unavailable or unsupported! " << std::endl; - // avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); - // } TORCH_CHECK(avCodec != nullptr, "Video codec not found"); - std::cout << "Using codec: " << avCodec->name << std::endl; AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec); TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context."); @@ -616,25 +599,15 @@ void VideoEncoder::initializeEncoder( outWidth_ = inWidth_; outHeight_ = inHeight_; - // Use YUV444P as default output format for lossless encoding // TODO-VideoEncoder: Enable other pixel formats - // outPixelFormat_ = AV_PIX_FMT_YUV444P; - // outPixelFormat_ = AV_PIX_FMT_YUV420P; - - // use first? - // outPixelFormat_ = getSupportedPixelFormats(*avCodec)[0]; - // Let FFmpeg choose best pixel format to minimize loss - int loss = 0; outPixelFormat_ = avcodec_find_best_pix_fmt_of_list( - getSupportedPixelFormats(*avCodec), // List of codec-supported formats + getSupportedPixelFormats(*avCodec), // List of supported formats AV_PIX_FMT_GBRP, // We reorder input to GBRP currently 0, // No alpha channel - &loss // Information about conversion losses + 0 // Discard conversion loss information ); TORCH_CHECK(outPixelFormat_ != -1, "Failed to find best pix fmt") - std::cout << "Using pixel format: " << av_get_pix_fmt_name(outPixelFormat_) - << std::endl; // Configure codec parameters avCodecContext_->codec_id = avCodec->id; @@ -645,21 +618,19 @@ void VideoEncoder::initializeEncoder( avCodecContext_->time_base = {1, inFrameRate_}; avCodecContext_->framerate = {inFrameRate_, 1}; - // Set global header flag for containers that need it (like Matroska) - // This populates extradata to enable mkv encoding - // https://stackoverflow.com/questions/60278773/invalid-data-when-creating-mkv-container-with-h264-stream-because-extradata-is-n + // Set flag for containers that require extradata to be in the codec context if (avFormatContext_->oformat->flags & AVFMT_GLOBALHEADER) { avCodecContext_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } - // accept optional args + // Apply videoStreamOptions AVDictionary* options = nullptr; if (videoStreamOptions.crf.has_value()) { av_dict_set( &options, "crf", - "0", - videoStreamOptions.crf.value()); // Needed to produce lossless videos + std::to_string(videoStreamOptions.crf.value()).c_str(), + 0); } int status = avcodec_open2(avCodecContext_.get(), avCodec, &options); av_dict_free(&options); @@ -684,12 +655,10 @@ void VideoEncoder::initializeEncoder( } void VideoEncoder::encode() { - av_log_set_level(AV_LOG_DEBUG); // To be on the safe side we enforce that encode() can only be called once TORCH_CHECK(!encodeWasCalled_, "Cannot call encode() twice."); encodeWasCalled_ = true; - av_dump_format(avFormatContext_.get(), 0, avFormatContext_->url, 1); int status = avformat_write_header(avFormatContext_.get(), nullptr); TORCH_CHECK( status == AVSUCCESS, @@ -810,6 +779,9 @@ void VideoEncoder::encodeFrame( if (packet->duration == 0) { packet->duration = 1; } + // av_packet_rescale_ts ensures encoded frames have correct timestamps. + // This prevents "no more frames" errors when decoding encoded frames, + // https://github.com/pytorch/audio/blob/b6a3368a45aaafe05f1a6a9f10c68adc5e944d9e/src/libtorio/ffmpeg/stream_writer/encoder.cpp#L46 av_packet_rescale_ts( packet.get(), avCodecContext_->time_base, avStream_->time_base); packet->stream_index = streamIndex_; From 444254ecf9af86b9a929aa3372059ba8f16a0688 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 29 Sep 2025 16:36:14 -0400 Subject: [PATCH 08/28] delete unused resource --- .../testsrc2.mp4.stream0.all_frames_info.json | 1202 ----------------- test/utils.py | 10 - 2 files changed, 1212 deletions(-) delete mode 100644 test/resources/testsrc2.mp4.stream0.all_frames_info.json diff --git a/test/resources/testsrc2.mp4.stream0.all_frames_info.json b/test/resources/testsrc2.mp4.stream0.all_frames_info.json deleted file mode 100644 index 4272e30ad..000000000 --- a/test/resources/testsrc2.mp4.stream0.all_frames_info.json +++ /dev/null @@ -1,1202 +0,0 @@ -[ - { - "pts_time": "0.000000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.016667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.033333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.050000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.066667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.083333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.100000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.116667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.133333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.150000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.166667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.183333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.200000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.216667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.233333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.250000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.266667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.283333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.300000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.316667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.333333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.350000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.366667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.383333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.400000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.416667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.433333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.450000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.466667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.483333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.500000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.516667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.533333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.550000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.566667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.583333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.600000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.616667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.633333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.650000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.666667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.683333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.700000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.716667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.733333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.750000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.766667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.783333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.800000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.816667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.833333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.850000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.866667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.883333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.900000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.916667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.933333", - "duration_time": "0.016667" - }, - { - "pts_time": "0.950000", - "duration_time": "0.016667" - }, - { - "pts_time": "0.966667", - "duration_time": "0.016667" - }, - { - "pts_time": "0.983333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.000000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.016667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.033333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.050000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.066667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.083333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.100000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.116667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.133333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.150000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.166667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.183333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.200000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.216667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.233333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.250000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.266667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.283333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.300000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.316667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.333333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.350000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.366667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.383333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.400000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.416667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.433333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.450000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.466667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.483333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.500000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.516667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.533333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.550000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.566667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.583333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.600000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.616667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.633333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.650000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.666667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.683333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.700000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.716667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.733333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.750000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.766667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.783333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.800000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.816667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.833333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.850000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.866667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.883333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.900000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.916667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.933333", - "duration_time": "0.016667" - }, - { - "pts_time": "1.950000", - "duration_time": "0.016667" - }, - { - "pts_time": "1.966667", - "duration_time": "0.016667" - }, - { - "pts_time": "1.983333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.000000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.016667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.033333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.050000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.066667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.083333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.100000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.116667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.133333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.150000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.166667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.183333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.200000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.216667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.233333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.250000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.266667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.283333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.300000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.316667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.333333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.350000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.366667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.383333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.400000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.416667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.433333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.450000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.466667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.483333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.500000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.516667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.533333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.550000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.566667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.583333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.600000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.616667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.633333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.650000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.666667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.683333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.700000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.716667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.733333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.750000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.766667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.783333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.800000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.816667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.833333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.850000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.866667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.883333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.900000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.916667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.933333", - "duration_time": "0.016667" - }, - { - "pts_time": "2.950000", - "duration_time": "0.016667" - }, - { - "pts_time": "2.966667", - "duration_time": "0.016667" - }, - { - "pts_time": "2.983333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.000000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.016667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.033333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.050000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.066667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.083333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.100000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.116667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.133333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.150000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.166667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.183333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.200000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.216667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.233333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.250000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.266667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.283333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.300000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.316667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.333333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.350000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.366667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.383333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.400000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.416667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.433333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.450000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.466667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.483333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.500000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.516667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.533333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.550000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.566667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.583333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.600000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.616667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.633333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.650000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.666667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.683333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.700000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.716667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.733333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.750000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.766667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.783333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.800000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.816667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.833333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.850000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.866667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.883333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.900000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.916667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.933333", - "duration_time": "0.016667" - }, - { - "pts_time": "3.950000", - "duration_time": "0.016667" - }, - { - "pts_time": "3.966667", - "duration_time": "0.016667" - }, - { - "pts_time": "3.983333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.000000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.016667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.033333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.050000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.066667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.083333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.100000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.116667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.133333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.150000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.166667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.183333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.200000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.216667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.233333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.250000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.266667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.283333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.300000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.316667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.333333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.350000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.366667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.383333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.400000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.416667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.433333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.450000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.466667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.483333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.500000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.516667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.533333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.550000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.566667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.583333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.600000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.616667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.633333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.650000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.666667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.683333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.700000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.716667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.733333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.750000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.766667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.783333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.800000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.816667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.833333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.850000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.866667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.883333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.900000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.916667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.933333", - "duration_time": "0.016667" - }, - { - "pts_time": "4.950000", - "duration_time": "0.016667" - }, - { - "pts_time": "4.966667", - "duration_time": "0.016667" - }, - { - "pts_time": "4.983333", - "duration_time": "0.016667" - } -] diff --git a/test/utils.py b/test/utils.py index 20522ff1e..174bbd1b7 100644 --- a/test/utils.py +++ b/test/utils.py @@ -411,16 +411,6 @@ def get_empty_chw_tensor(self, *, stream_index: int) -> torch.Tensor: frames={}, # Automatically loaded from json file ) -# Video generated with: -# ffmpeg -y -f lavfi -i testsrc2 -t 5 -c:v h264 -r 60 -g 600 -pix_fmt yuv420p testsrc2.mp4 -TESTSRC2_VIDEO = TestVideo( - filename="testsrc2.mp4", - default_stream_index=0, - stream_infos={ - 0: TestVideoStreamInfo(width=320, height=240, num_color_channels=3), - }, - frames={}, # Automatically loaded from json file -) # Video generated with: # ffmpeg -f lavfi -i testsrc2=duration=1:size=200x200:rate=30 -c:v libx265 -pix_fmt yuv420p10le -preset fast -crf 23 h265_10bits.mp4 From 4bac987fab87784a44ca8a76ebe3e5d7942f3246 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 29 Sep 2025 17:51:53 -0400 Subject: [PATCH 09/28] remove whitespace --- test/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/utils.py b/test/utils.py index 174bbd1b7..7c91f307c 100644 --- a/test/utils.py +++ b/test/utils.py @@ -411,7 +411,6 @@ def get_empty_chw_tensor(self, *, stream_index: int) -> torch.Tensor: frames={}, # Automatically loaded from json file ) - # Video generated with: # ffmpeg -f lavfi -i testsrc2=duration=1:size=200x200:rate=30 -c:v libx265 -pix_fmt yuv420p10le -preset fast -crf 23 h265_10bits.mp4 H265_10BITS = TestVideo( From 75c5b3667639e62f40a1326027f400ff51c988ca Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Tue, 30 Sep 2025 00:26:55 -0400 Subject: [PATCH 10/28] adjust test order --- test/test_ops.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index df40fe4c3..3256a7955 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1384,18 +1384,10 @@ def decode(self, file_path) -> torch.Tensor: @pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv")) def test_video_encoder_test_round_trip(self, tmp_path, format): - ffmpeg_version = get_ffmpeg_major_version() if ffmpeg_version == 4 and format == "webm": pytest.skip("Codec for webm is not available in the FFmpeg4 installation.") - # The output pixel format depends on the codecs available, and FFmpeg version. - # In the cases where YUV420P is chosen and chroma subsampling happens, we need higher tolerance. - if ffmpeg_version == 6 or format in ("avi", "flv"): - atol = 55 - else: - atol = 2 asset = NASA_VIDEO - # Test that decode(encode(decode(asset))) == decode(asset) source_frames = self.decode(str(asset.path)).data @@ -1404,6 +1396,13 @@ def test_video_encoder_test_round_trip(self, tmp_path, format): encode_video_to_file(source_frames, frame_rate, encoded_path, crf=0) round_trip_frames = self.decode(encoded_path).data + # The output pixel format depends on the codecs available, and FFmpeg version. + # In the cases where YUV420P is chosen and chroma subsampling happens, assert_close needs higher tolerance. + if ffmpeg_version == 6 or format in ("avi", "flv"): + atol = 55 + else: + atol = 2 + # TODO-VideoEncoder: Test with FFmpeg's testsrc2 video # Check that PSNR for decode(encode(samples)) is above 30 for s_frame, rt_frame in zip(source_frames, round_trip_frames): res = psnr(s_frame, rt_frame) From 796499ee5a2b8296e38077ac90ba04e1d3fecc39 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Tue, 30 Sep 2025 10:59:34 -0400 Subject: [PATCH 11/28] remove positional arg --- test/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_ops.py b/test/test_ops.py index 3256a7955..41420e913 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1393,7 +1393,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format): encoded_path = str(tmp_path / f"encoder_output.{format}") frame_rate = 30 # Frame rate is fixed with num frames decoded - encode_video_to_file(source_frames, frame_rate, encoded_path, crf=0) + encode_video_to_file(source_frames, frame_rate, encoded_path, 0) round_trip_frames = self.decode(encoded_path).data # The output pixel format depends on the codecs available, and FFmpeg version. From 2cafa103da3796c93d722f54f0b43ec1beba98d5 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Tue, 30 Sep 2025 15:37:42 -0400 Subject: [PATCH 12/28] Make crf optional --- src/torchcodec/_core/custom_ops.cpp | 4 ++-- src/torchcodec/_core/ops.py | 2 +- test/test_ops.py | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index e23417bc0..5ba98e2c1 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -33,7 +33,7 @@ TORCH_LIBRARY(torchcodec_ns, m) { m.def( "encode_audio_to_file(Tensor samples, int sample_rate, str filename, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()"); m.def( - "encode_video_to_file(Tensor frames, int frame_rate, str filename, int crf) -> ()"); + "encode_video_to_file(Tensor frames, int frame_rate, str filename, int? crf=None) -> ()"); m.def( "encode_audio_to_tensor(Tensor samples, int sample_rate, str format, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> Tensor"); m.def( @@ -502,7 +502,7 @@ void encode_video_to_file( const at::Tensor& frames, int64_t frame_rate, std::string_view file_name, - int64_t crf) { + std::optional crf = std::nullopt) { VideoStreamOptions videoStreamOptions; videoStreamOptions.crf = crf; VideoEncoder( diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index bb2900d91..08ee4fa28 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -259,7 +259,7 @@ def encode_video_to_file_abstract( frames: torch.Tensor, frame_rate: int, filename: str, - crf: int, + crf: Optional[int], ) -> None: return diff --git a/test/test_ops.py b/test/test_ops.py index 41420e913..88e4b9830 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1393,11 +1393,12 @@ def test_video_encoder_test_round_trip(self, tmp_path, format): encoded_path = str(tmp_path / f"encoder_output.{format}") frame_rate = 30 # Frame rate is fixed with num frames decoded - encode_video_to_file(source_frames, frame_rate, encoded_path, 0) + encode_video_to_file(source_frames, frame_rate, encoded_path, crf=0) round_trip_frames = self.decode(encoded_path).data - # The output pixel format depends on the codecs available, and FFmpeg version. - # In the cases where YUV420P is chosen and chroma subsampling happens, assert_close needs higher tolerance. + # In the cases where a lossy pixel format conversion occurs, higher tolerance is needed. + # Converting to the output format may perform chroma subsampling. + # Other times, no conversion between YUV and RGB is required. if ffmpeg_version == 6 or format in ("avi", "flv"): atol = 55 else: From 1aebfec4a976b1c91b9595b3f1da02fb190bf952 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 1 Oct 2025 10:18:18 -0400 Subject: [PATCH 13/28] crf default --- src/torchcodec/_core/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index 08ee4fa28..44dc89e2b 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -259,7 +259,7 @@ def encode_video_to_file_abstract( frames: torch.Tensor, frame_rate: int, filename: str, - crf: Optional[int], + crf: Optional[int] = None, ) -> None: return From 49d85d6645ce3267df252e087bdf5dc3c41a31cf Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 1 Oct 2025 12:34:21 -0400 Subject: [PATCH 14/28] name args? --- test/test_ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_ops.py b/test/test_ops.py index 88e4b9830..78393bef9 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1393,7 +1393,9 @@ def test_video_encoder_test_round_trip(self, tmp_path, format): encoded_path = str(tmp_path / f"encoder_output.{format}") frame_rate = 30 # Frame rate is fixed with num frames decoded - encode_video_to_file(source_frames, frame_rate, encoded_path, crf=0) + encode_video_to_file( + frames=source_frames, frame_rate=frame_rate, filename=encoded_path, crf=0 + ) round_trip_frames = self.decode(encoded_path).data # In the cases where a lossy pixel format conversion occurs, higher tolerance is needed. From 4ab1b6308351b0155cf15dbfc8b8bf799e78fb14 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 1 Oct 2025 15:14:38 -0400 Subject: [PATCH 15/28] windows + webm test skips --- test/test_ops.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 78393bef9..a4ec6a6be 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -9,7 +9,7 @@ import os from functools import partial -from .utils import get_ffmpeg_major_version, in_fbcode +from .utils import get_ffmpeg_major_version, in_fbcode, IS_WINDOWS os.environ["TORCH_LOGS"] = "output_code" import json @@ -1385,8 +1385,15 @@ def decode(self, file_path) -> torch.Tensor: @pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv")) def test_video_encoder_test_round_trip(self, tmp_path, format): ffmpeg_version = get_ffmpeg_major_version() - if ffmpeg_version == 4 and format == "webm": - pytest.skip("Codec for webm is not available in the FFmpeg4 installation.") + if format == "webm": + if ffmpeg_version == 4: + pytest.skip( + "Codec for webm is not available in the FFmpeg4 installation." + ) + if IS_WINDOWS and ffmpeg_version in (6, 7): + pytest.skip( + "Codec for webm is not available in the FFmpeg6/7 installation on Windows." + ) asset = NASA_VIDEO # Test that decode(encode(decode(asset))) == decode(asset) source_frames = self.decode(str(asset.path)).data From 5f2928f498f2b564ab8adcd5a4e9f3847c955167 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Sat, 4 Oct 2025 02:10:00 -0400 Subject: [PATCH 16/28] compare against cli, high % match --- src/torchcodec/_core/Encoder.cpp | 19 +++++- test/test_ops.py | 99 ++++++++++++++++++++++++++++---- 2 files changed, 106 insertions(+), 12 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index a4555ddd0..80682b76f 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -4,6 +4,10 @@ #include "src/torchcodec/_core/Encoder.h" #include "torch/types.h" +extern "C" { +#include +} + namespace facebook::torchcodec { namespace { @@ -579,6 +583,7 @@ VideoEncoder::VideoEncoder( void VideoEncoder::initializeEncoder( const VideoStreamOptions& videoStreamOptions) { + av_log_set_level(AV_LOG_VERBOSE); const AVCodec* avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); TORCH_CHECK(avCodec != nullptr, "Video codec not found"); @@ -625,12 +630,22 @@ void VideoEncoder::initializeEncoder( // Apply videoStreamOptions AVDictionary* options = nullptr; - if (videoStreamOptions.crf.has_value()) { + if (videoStreamOptions.crf.has_value() && + (avCodec->id != AV_CODEC_ID_MPEG4 && avCodec->id != AV_CODEC_ID_FLV1)) { av_dict_set( &options, "crf", std::to_string(videoStreamOptions.crf.value()).c_str(), 0); + } else { + // For codecs that don't support CRF (mpeg4, flv1), + // use quality-based encoding via global_quality + qscale flag + avCodecContext_->flags |= AV_CODEC_FLAG_QSCALE; + // While qscale is similar to crf, it is likely not interchangeable. + // Reuse of crf below is only intended to work in VideoEncoder tests where + // crf = 0 + avCodecContext_->global_quality = + FF_QP2LAMBDA * videoStreamOptions.crf.value(); } int status = avcodec_open2(avCodecContext_.get(), avCodec, &options); av_dict_free(&options); @@ -694,7 +709,7 @@ UniqueAVFrame VideoEncoder::convertTensorToAVFrame( outWidth_, outHeight_, outPixelFormat_, - SWS_BILINEAR, + SWS_BICUBIC, // Used by FFmpeg CLI nullptr, nullptr, nullptr)); diff --git a/test/test_ops.py b/test/test_ops.py index a4ec6a6be..2e54b2942 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -9,7 +9,13 @@ import os from functools import partial -from .utils import get_ffmpeg_major_version, in_fbcode, IS_WINDOWS +from .utils import ( + assert_tensor_close_on_at_least, + get_ffmpeg_major_version, + in_fbcode, + IS_WINDOWS, + TEST_SRC_2_720P, +) os.environ["TORCH_LOGS"] = "output_code" import json @@ -1383,7 +1389,7 @@ def decode(self, file_path) -> torch.Tensor: return frames @pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv")) - def test_video_encoder_test_round_trip(self, tmp_path, format): + def test_video_encoder_round_trip(self, tmp_path, format): ffmpeg_version = get_ffmpeg_major_version() if format == "webm": if ffmpeg_version == 4: @@ -1394,7 +1400,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format): pytest.skip( "Codec for webm is not available in the FFmpeg6/7 installation on Windows." ) - asset = NASA_VIDEO + asset = TEST_SRC_2_720P # Test that decode(encode(decode(asset))) == decode(asset) source_frames = self.decode(str(asset.path)).data @@ -1404,20 +1410,93 @@ def test_video_encoder_test_round_trip(self, tmp_path, format): frames=source_frames, frame_rate=frame_rate, filename=encoded_path, crf=0 ) round_trip_frames = self.decode(encoded_path).data - - # In the cases where a lossy pixel format conversion occurs, higher tolerance is needed. - # Converting to the output format may perform chroma subsampling. - # Other times, no conversion between YUV and RGB is required. + assert ( + source_frames.shape == round_trip_frames.shape + ), f"Shape mismatch: source {source_frames.shape} vs round_trip {round_trip_frames.shape}" + assert ( + source_frames.dtype == round_trip_frames.dtype + ), f"Dtype mismatch: source {source_frames.dtype} vs round_trip {round_trip_frames.dtype}" + + # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels + # are within a higher tolerance. if ffmpeg_version == 6 or format in ("avi", "flv"): - atol = 55 + assert_close = partial(assert_tensor_close_on_at_least, percentage=99) + atol = 15 else: + assert_close = torch.testing.assert_close atol = 2 - # TODO-VideoEncoder: Test with FFmpeg's testsrc2 video # Check that PSNR for decode(encode(samples)) is above 30 for s_frame, rt_frame in zip(source_frames, round_trip_frames): res = psnr(s_frame, rt_frame) assert res > 30 - torch.testing.assert_close(s_frame, rt_frame, atol=atol, rtol=0) + assert_close(s_frame, rt_frame, atol=atol, rtol=0) + + @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") + @pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv")) + def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): + ffmpeg_version = get_ffmpeg_major_version() + if format == "webm" and ffmpeg_version == 4: + pytest.skip("Codec for webm is not available in the FFmpeg4 installation.") + asset = TEST_SRC_2_720P + source_frames = self.decode(str(asset.path)).data + frame_rate = 30 + + # Encode with FFmpeg CLI + temp_raw_path = str(tmp_path / "temp_input.raw") + with open(temp_raw_path, "wb") as f: + f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes()) + + ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}") + # Test that lossless encoding is identical + crf = 0 + quality_params = ["-crf", str(crf)] + # Some codecs (ex. MPEG4) do not support CRF, qscale is used for lossless encoding. + # Flags not supported by the selected codec will be ignored, so we set both crf and qscale. + quality_params += ["-q:v", str(crf)] + ffmpeg_cmd = [ + "ffmpeg", + "-y", + "-f", + "rawvideo", + "-pix_fmt", + "rgb24", + "-s", + f"{source_frames.shape[3]}x{source_frames.shape[2]}", + "-r", + str(frame_rate), + "-i", + temp_raw_path, + *quality_params, + ffmpeg_encoded_path, + ] + subprocess.run(ffmpeg_cmd, check=True) + + # Encode with our video encoder + encoder_output_path = str(tmp_path / f"encoder_output.{format}") + encode_video_to_file( + frames=source_frames, + frame_rate=frame_rate, + filename=encoder_output_path, + crf=crf, + ) + + ffmpeg_frames = self.decode(ffmpeg_encoded_path).data + encoder_frames = self.decode(encoder_output_path).data + + assert ffmpeg_frames.shape[0] == encoder_frames.shape[0] + + # If FFmpeg selects a codec or pixel format that uses qscale (not crf), + # the VideoEncoder outputs *slightly* different frames. + # There may be additional subtle differences in the encoder. + percentage = 97 if ffmpeg_version == 6 or format in ("avi") else 99 + + # Check that PSNR between both encoded versions is high + for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): + res = psnr(ff_frame, enc_frame) + assert res > 30 + assert_tensor_close_on_at_least( + ff_frame, enc_frame, percentage=percentage, atol=2 + ) if __name__ == "__main__": From 20552910231c0baeb74a699b2b57aa6ce9b21e2a Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 6 Oct 2025 11:39:01 -0400 Subject: [PATCH 17/28] skip webm/windows/ffmpeg6,7 --- src/torchcodec/_core/Encoder.cpp | 1 - test/test_ops.py | 26 +++++++++++++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 80682b76f..e3a24911b 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -583,7 +583,6 @@ VideoEncoder::VideoEncoder( void VideoEncoder::initializeEncoder( const VideoStreamOptions& videoStreamOptions) { - av_log_set_level(AV_LOG_VERBOSE); const AVCodec* avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); TORCH_CHECK(avCodec != nullptr, "Video codec not found"); diff --git a/test/test_ops.py b/test/test_ops.py index 2e54b2942..8d912f895 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -9,14 +9,6 @@ import os from functools import partial -from .utils import ( - assert_tensor_close_on_at_least, - get_ffmpeg_major_version, - in_fbcode, - IS_WINDOWS, - TEST_SRC_2_720P, -) - os.environ["TORCH_LOGS"] = "output_code" import json import subprocess @@ -53,6 +45,10 @@ from .utils import ( all_supported_devices, assert_frames_equal, + assert_tensor_close_on_at_least, + get_ffmpeg_major_version, + in_fbcode, + IS_WINDOWS, NASA_AUDIO, NASA_AUDIO_MP3, NASA_VIDEO, @@ -61,8 +57,8 @@ SINE_MONO_S32, SINE_MONO_S32_44100, SINE_MONO_S32_8000, + TEST_SRC_2_720P, unsplit_device_str, - TESTSRC2_VIDEO, ) torch._dynamo.config.capture_dynamic_output_shape_ops = True @@ -1431,12 +1427,20 @@ def test_video_encoder_round_trip(self, tmp_path, format): assert res > 30 assert_close(s_frame, rt_frame, atol=atol, rtol=0) + @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") @pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv")) def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): ffmpeg_version = get_ffmpeg_major_version() - if format == "webm" and ffmpeg_version == 4: - pytest.skip("Codec for webm is not available in the FFmpeg4 installation.") + if format == "webm": + if ffmpeg_version == 4: + pytest.skip( + "Codec for webm is not available in the FFmpeg4 installation." + ) + if IS_WINDOWS and ffmpeg_version in (6, 7): + pytest.skip( + "Codec for webm is not available in the FFmpeg6/7 installation on Windows." + ) asset = TEST_SRC_2_720P source_frames = self.decode(str(asset.path)).data frame_rate = 30 From e0e456c02eeab324987bd7422356a9264a5fe60a Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 6 Oct 2025 14:20:37 -0400 Subject: [PATCH 18/28] test gif against ffmpeg cli --- test/test_ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_ops.py b/test/test_ops.py index 8d912f895..8aa8aa678 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1429,7 +1429,9 @@ def test_video_encoder_round_trip(self, tmp_path, format): @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") - @pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv")) + @pytest.mark.parametrize( + "format", ("mov", "mp4", "avi", "mkv", "webm", "flv", "gif") + ) def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): ffmpeg_version = get_ffmpeg_major_version() if format == "webm": From d2b2f14c06a6fce430c89332a4b75409d9a9ff30 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 6 Oct 2025 14:21:41 -0400 Subject: [PATCH 19/28] more sensible qscale lower bound --- src/torchcodec/_core/Encoder.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index e3a24911b..b8739e8fb 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -640,11 +640,12 @@ void VideoEncoder::initializeEncoder( // For codecs that don't support CRF (mpeg4, flv1), // use quality-based encoding via global_quality + qscale flag avCodecContext_->flags |= AV_CODEC_FLAG_QSCALE; - // While qscale is similar to crf, it is likely not interchangeable. - // Reuse of crf below is only intended to work in VideoEncoder tests where - // crf = 0 - avCodecContext_->global_quality = - FF_QP2LAMBDA * videoStreamOptions.crf.value(); + // Reuse of crf below is only intended to work in tests where crf = 0 + // Use qmin as lower bound for best possible quality + int qp = videoStreamOptions.crf.value() <= avCodecContext_->qmin + ? avCodecContext_->qmin + : videoStreamOptions.crf.value(); + avCodecContext_->global_quality = FF_QP2LAMBDA * qp; } int status = avcodec_open2(avCodecContext_.get(), avCodec, &options); av_dict_free(&options); From d7bb78687f102ee6a6cf13a35bd34b4b62506165 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 10 Oct 2025 14:15:53 -0400 Subject: [PATCH 20/28] incorporate comment suggestions in test_ops --- test/test_ops.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 8aa8aa678..036ebf191 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1386,6 +1386,7 @@ def decode(self, file_path) -> torch.Tensor: @pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv")) def test_video_encoder_round_trip(self, tmp_path, format): + # Test that decode(encode(decode(asset))) == decode(asset) ffmpeg_version = get_ffmpeg_major_version() if format == "webm": if ffmpeg_version == 4: @@ -1397,7 +1398,6 @@ def test_video_encoder_round_trip(self, tmp_path, format): "Codec for webm is not available in the FFmpeg6/7 installation on Windows." ) asset = TEST_SRC_2_720P - # Test that decode(encode(decode(asset))) == decode(asset) source_frames = self.decode(str(asset.path)).data encoded_path = str(tmp_path / f"encoder_output.{format}") @@ -1406,12 +1406,8 @@ def test_video_encoder_round_trip(self, tmp_path, format): frames=source_frames, frame_rate=frame_rate, filename=encoded_path, crf=0 ) round_trip_frames = self.decode(encoded_path).data - assert ( - source_frames.shape == round_trip_frames.shape - ), f"Shape mismatch: source {source_frames.shape} vs round_trip {round_trip_frames.shape}" - assert ( - source_frames.dtype == round_trip_frames.dtype - ), f"Dtype mismatch: source {source_frames.dtype} vs round_trip {round_trip_frames.dtype}" + assert source_frames.shape == round_trip_frames.shape + assert source_frames.dtype == round_trip_frames.dtype # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels # are within a higher tolerance. @@ -1421,13 +1417,10 @@ def test_video_encoder_round_trip(self, tmp_path, format): else: assert_close = torch.testing.assert_close atol = 2 - # Check that PSNR for decode(encode(samples)) is above 30 for s_frame, rt_frame in zip(source_frames, round_trip_frames): - res = psnr(s_frame, rt_frame) - assert res > 30 + assert psnr(s_frame, rt_frame) > 30 assert_close(s_frame, rt_frame, atol=atol, rtol=0) - @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") @pytest.mark.parametrize( "format", ("mov", "mp4", "avi", "mkv", "webm", "flv", "gif") @@ -1453,7 +1446,6 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes()) ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}") - # Test that lossless encoding is identical crf = 0 quality_params = ["-crf", str(crf)] # Some codecs (ex. MPEG4) do not support CRF, qscale is used for lossless encoding. From 266f9f586adf124eff34181b9a73821dddfa3bc0 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 10 Oct 2025 15:15:55 -0400 Subject: [PATCH 21/28] move torchaudio comment, remove streamIndex_ --- src/torchcodec/_core/Encoder.cpp | 10 +++++----- src/torchcodec/_core/Encoder.h | 1 - src/torchcodec/_core/FFMPEGCommon.cpp | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index b8739e8fb..c8e17d3d4 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -666,7 +666,6 @@ void VideoEncoder::initializeEncoder( status == AVSUCCESS, "avcodec_parameters_from_context failed: ", getFFMPEGErrorStringFromErrorCode(status)); - streamIndex_ = avStream_->index; } void VideoEncoder::encode() { @@ -791,15 +790,16 @@ void VideoEncoder::encodeFrame( "Error receiving packet: ", getFFMPEGErrorStringFromErrorCode(status)); + // The code below is borrowed from torchaudio: + // https://github.com/pytorch/audio/blob/b6a3368a45aaafe05f1a6a9f10c68adc5e944d9e/src/libtorio/ffmpeg/stream_writer/encoder.cpp#L46 + // Setting packet->duration to 1 allows the last frame to be properly + // encoded, and needs to be set before calling av_packet_rescale_ts. if (packet->duration == 0) { packet->duration = 1; } - // av_packet_rescale_ts ensures encoded frames have correct timestamps. - // This prevents "no more frames" errors when decoding encoded frames, - // https://github.com/pytorch/audio/blob/b6a3368a45aaafe05f1a6a9f10c68adc5e944d9e/src/libtorio/ffmpeg/stream_writer/encoder.cpp#L46 av_packet_rescale_ts( packet.get(), avCodecContext_->time_base, avStream_->time_base); - packet->stream_index = streamIndex_; + packet->stream_index = avStream_->index; status = av_interleaved_write_frame(avFormatContext_.get(), packet.get()); TORCH_CHECK( diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h index 14d5fca2e..62d30a624 100644 --- a/src/torchcodec/_core/Encoder.h +++ b/src/torchcodec/_core/Encoder.h @@ -153,7 +153,6 @@ class VideoEncoder { UniqueEncodingAVFormatContext avFormatContext_; UniqueAVCodecContext avCodecContext_; - int streamIndex_ = -1; AVStream* avStream_; UniqueSwsContext swsContext_; diff --git a/src/torchcodec/_core/FFMPEGCommon.cpp b/src/torchcodec/_core/FFMPEGCommon.cpp index f6247208c..0570f06cf 100644 --- a/src/torchcodec/_core/FFMPEGCommon.cpp +++ b/src/torchcodec/_core/FFMPEGCommon.cpp @@ -92,7 +92,7 @@ const int* getSupportedSampleRates(const AVCodec& avCodec) { const AVPixelFormat* getSupportedPixelFormats(const AVCodec& avCodec) { const AVPixelFormat* supportedPixelFormats = nullptr; -#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 13, 100) +#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 13, 100) // FFmpeg >= 7.1 int numPixelFormats = 0; int ret = avcodec_get_supported_config( nullptr, From 4516b3508cb64b2a106fb8cf590ac4da9cd14d7d Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 10 Oct 2025 15:34:31 -0400 Subject: [PATCH 22/28] remove qscale, adjust tests --- src/torchcodec/_core/Encoder.cpp | 13 +------------ test/test_ops.py | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 26 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index c8e17d3d4..5cc41c43e 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -629,23 +629,12 @@ void VideoEncoder::initializeEncoder( // Apply videoStreamOptions AVDictionary* options = nullptr; - if (videoStreamOptions.crf.has_value() && - (avCodec->id != AV_CODEC_ID_MPEG4 && avCodec->id != AV_CODEC_ID_FLV1)) { + if (videoStreamOptions.crf.has_value()) { av_dict_set( &options, "crf", std::to_string(videoStreamOptions.crf.value()).c_str(), 0); - } else { - // For codecs that don't support CRF (mpeg4, flv1), - // use quality-based encoding via global_quality + qscale flag - avCodecContext_->flags |= AV_CODEC_FLAG_QSCALE; - // Reuse of crf below is only intended to work in tests where crf = 0 - // Use qmin as lower bound for best possible quality - int qp = videoStreamOptions.crf.value() <= avCodecContext_->qmin - ? avCodecContext_->qmin - : videoStreamOptions.crf.value(); - avCodecContext_->global_quality = FF_QP2LAMBDA * qp; } int status = avcodec_open2(avCodecContext_.get(), avCodec, &options); av_dict_free(&options); diff --git a/test/test_ops.py b/test/test_ops.py index 036ebf191..6fe9d0410 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1384,19 +1384,20 @@ def decode(self, file_path) -> torch.Tensor: frames, *_ = get_frames_in_range(decoder, start=0, stop=60) return frames - @pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv")) + @pytest.mark.parametrize("format", ("mov", "mp4", "mkv", "webm")) def test_video_encoder_round_trip(self, tmp_path, format): # Test that decode(encode(decode(asset))) == decode(asset) ffmpeg_version = get_ffmpeg_major_version() - if format == "webm": - if ffmpeg_version == 4: - pytest.skip( - "Codec for webm is not available in the FFmpeg4 installation." - ) - if IS_WINDOWS and ffmpeg_version in (6, 7): - pytest.skip( - "Codec for webm is not available in the FFmpeg6/7 installation on Windows." - ) + # In FFmpeg6, the default codec's best pixel format is lossy for all container formats but webm. + # As a result, we skip the round trip test. + if ffmpeg_version == 6 and format != "webm": + pytest.skip( + f"FFmpeg6 defaults to lossy encoding for {format}, skipping round-trip test." + ) + if format == "webm" and ( + ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) + ): + pytest.skip("Codec for webm is not available in this FFmpeg installation.") asset = TEST_SRC_2_720P source_frames = self.decode(str(asset.path)).data @@ -1448,9 +1449,8 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}") crf = 0 quality_params = ["-crf", str(crf)] - # Some codecs (ex. MPEG4) do not support CRF, qscale is used for lossless encoding. - # Flags not supported by the selected codec will be ignored, so we set both crf and qscale. - quality_params += ["-q:v", str(crf)] + # Some codecs (ex. MPEG4) do not support CRF. + # Flags not supported by the selected codec will be ignored. ffmpeg_cmd = [ "ffmpeg", "-y", @@ -1486,7 +1486,7 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): # If FFmpeg selects a codec or pixel format that uses qscale (not crf), # the VideoEncoder outputs *slightly* different frames. # There may be additional subtle differences in the encoder. - percentage = 97 if ffmpeg_version == 6 or format in ("avi") else 99 + percentage = 95 if ffmpeg_version == 6 or format in ("avi") else 99 # Check that PSNR between both encoded versions is high for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): From 163c5c2d4aa622ef34b90f44c50c7d902dc0db0d Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Sun, 12 Oct 2025 22:36:33 -0400 Subject: [PATCH 23/28] specify cpu in tests --- test/test_ops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 6fe9d0410..8063c459f 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1378,9 +1378,9 @@ def test_bad_input(self, tmp_path): filename="./bad/path.mp3", ) - def decode(self, file_path) -> torch.Tensor: + def decode(self, file_path, device="cpu") -> torch.Tensor: decoder = create_from_file(str(file_path), seek_mode="approximate") - add_video_stream(decoder) + add_video_stream(decoder, device=device) frames, *_ = get_frames_in_range(decoder, start=0, stop=60) return frames @@ -1399,14 +1399,14 @@ def test_video_encoder_round_trip(self, tmp_path, format): ): pytest.skip("Codec for webm is not available in this FFmpeg installation.") asset = TEST_SRC_2_720P - source_frames = self.decode(str(asset.path)).data + source_frames = self.decode(str(asset.path), device="cpu").data encoded_path = str(tmp_path / f"encoder_output.{format}") frame_rate = 30 # Frame rate is fixed with num frames decoded encode_video_to_file( frames=source_frames, frame_rate=frame_rate, filename=encoded_path, crf=0 ) - round_trip_frames = self.decode(encoded_path).data + round_trip_frames = self.decode(encoded_path, device="cpu").data assert source_frames.shape == round_trip_frames.shape assert source_frames.dtype == round_trip_frames.dtype From d4017bd4015508598575e55ea1b408d2f41920c7 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 13 Oct 2025 00:36:46 -0400 Subject: [PATCH 24/28] remove outdated test condition --- test/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_ops.py b/test/test_ops.py index 8063c459f..2c6f2c7fe 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1412,7 +1412,7 @@ def test_video_encoder_round_trip(self, tmp_path, format): # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels # are within a higher tolerance. - if ffmpeg_version == 6 or format in ("avi", "flv"): + if ffmpeg_version == 6: assert_close = partial(assert_tensor_close_on_at_least, percentage=99) atol = 15 else: From 185c656e94c58c289f1fa12adf9a59f40cf2a438 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 13 Oct 2025 11:14:43 -0400 Subject: [PATCH 25/28] decrement cli accuracy requirement for avi --- test/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_ops.py b/test/test_ops.py index 2c6f2c7fe..77dcac07a 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1486,7 +1486,7 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): # If FFmpeg selects a codec or pixel format that uses qscale (not crf), # the VideoEncoder outputs *slightly* different frames. # There may be additional subtle differences in the encoder. - percentage = 95 if ffmpeg_version == 6 or format in ("avi") else 99 + percentage = 94 if ffmpeg_version == 6 or format in ("avi") else 99 # Check that PSNR between both encoded versions is high for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): From 4cc2eb3abda2a7d2b0385a343b13787898dca04c Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 13 Oct 2025 13:23:00 -0400 Subject: [PATCH 26/28] Revert "specify cpu in tests" This reverts commit 163c5c2d4aa622ef34b90f44c50c7d902dc0db0d. --- test/test_ops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 77dcac07a..efd5d356b 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1378,9 +1378,9 @@ def test_bad_input(self, tmp_path): filename="./bad/path.mp3", ) - def decode(self, file_path, device="cpu") -> torch.Tensor: + def decode(self, file_path) -> torch.Tensor: decoder = create_from_file(str(file_path), seek_mode="approximate") - add_video_stream(decoder, device=device) + add_video_stream(decoder) frames, *_ = get_frames_in_range(decoder, start=0, stop=60) return frames @@ -1399,14 +1399,14 @@ def test_video_encoder_round_trip(self, tmp_path, format): ): pytest.skip("Codec for webm is not available in this FFmpeg installation.") asset = TEST_SRC_2_720P - source_frames = self.decode(str(asset.path), device="cpu").data + source_frames = self.decode(str(asset.path)).data encoded_path = str(tmp_path / f"encoder_output.{format}") frame_rate = 30 # Frame rate is fixed with num frames decoded encode_video_to_file( frames=source_frames, frame_rate=frame_rate, filename=encoded_path, crf=0 ) - round_trip_frames = self.decode(encoded_path, device="cpu").data + round_trip_frames = self.decode(encoded_path).data assert source_frames.shape == round_trip_frames.shape assert source_frames.dtype == round_trip_frames.dtype From b13b5db2349aee523d505cf83c8dcebc73573ddc Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 13 Oct 2025 13:27:51 -0400 Subject: [PATCH 27/28] replace in with == --- test/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_ops.py b/test/test_ops.py index efd5d356b..fddd4043c 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1486,7 +1486,7 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): # If FFmpeg selects a codec or pixel format that uses qscale (not crf), # the VideoEncoder outputs *slightly* different frames. # There may be additional subtle differences in the encoder. - percentage = 94 if ffmpeg_version == 6 or format in ("avi") else 99 + percentage = 94 if ffmpeg_version == 6 or format == "avi" else 99 # Check that PSNR between both encoded versions is high for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): From 160d4496c944f2c165af7772f1ca26a3c1205944 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Mon, 13 Oct 2025 15:35:35 -0400 Subject: [PATCH 28/28] use nullptr instead of 0 --- src/torchcodec/_core/Encoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 5cc41c43e..14ef1cb94 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -609,7 +609,7 @@ void VideoEncoder::initializeEncoder( getSupportedPixelFormats(*avCodec), // List of supported formats AV_PIX_FMT_GBRP, // We reorder input to GBRP currently 0, // No alpha channel - 0 // Discard conversion loss information + nullptr // Discard conversion loss information ); TORCH_CHECK(outPixelFormat_ != -1, "Failed to find best pix fmt")