From 42bb016251ffd1c5bdafa61d08c7ddd3ddcca3b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Oca=C3=B1a=20Gonz=C3=A1lez?= Date: Thu, 18 Dec 2025 20:51:25 +0100 Subject: [PATCH] [ONEM-41937] Fix sample DTS to prevent deletion of preexisting samples when PTS doesn't conflict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - see original description from https://github.com/WebKit/WebKit/pull/57363 below - this also reverts commit 90c814dcca2255f0f1c4f4a23d27ec5c203ecf04 (disable MP4 edit-lists suppost [EXPERIMENTAL][MSE] Fix sample DTS to prevent deletion of preexisting samples when PTS doesn't conflict Let's consider a preexisting samples A, B, C, and a new sample N with the following timestamps: Decoding time Presentation time ------------- ----------------- Sample A: 827.826164000 827.826164000 Sample B: 827.826164100 827.826164100 Sample C: 827.826164200 827.826164200 Sample N: 827.826125000 827.909542000 These disparities in DTS vs. PTS offsets between samples A and B exist because both samples come from completely different videos (eg: because of ad insertion). In a timeline: DTS ···NABC······· PTS ····ABC···N··· Judging by PTS, the samples A, B, C should remain and theoretically don't need to be erased. However, considering DTS, the samples A, B, C have to be erased, because the sample N would reach the video decoder earlier (by decoding ordering) and change the state of the decoder, making it unsuitable for samples A, B, C. This commit tries to fix the situation by changing sample N (the new sample) DTS to a value slightly higher than sample C DTS, like this: Decoding time Presentation time ------------- ----------------- Sample A: 827.826164000 827.826164000 Sample B: 827.826164100 827.826164100 Sample C: 827.826164200 827.826164200 Sample N: *827.826164201* 827.909542000 DTS ····ABCN······ PTS ····ABC···N··· This fix avoids the deletion of samples A, B, C and the potential problems associated with that. --- .../platform/graphics/SourceBufferPrivate.cpp | 45 ++++++++++++++----- .../graphics/gstreamer/mse/AppendPipeline.cpp | 7 +-- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/Source/WebCore/platform/graphics/SourceBufferPrivate.cpp b/Source/WebCore/platform/graphics/SourceBufferPrivate.cpp index 49a6a90fd518e..07ddc085533f6 100644 --- a/Source/WebCore/platform/graphics/SourceBufferPrivate.cpp +++ b/Source/WebCore/platform/graphics/SourceBufferPrivate.cpp @@ -932,6 +932,14 @@ void SourceBufferPrivate::didReceiveSample(Ref&& originalSample) erasedSamples.addRange(iterPair.first, iterPair.second); } + // There are many files out there where the frame times are not perfectly contiguous and may have small overlaps + // between the beginning of a frame and the end of the previous one; therefore a tolerance is needed whenever + // durations are considered. + // For instance, most WebM files are muxed rounded to the millisecond (the default TimecodeScale of the format) + // but their durations use a finer timescale (causing a sub-millisecond overlap). More rarely, there are also + // MP4 files with slightly off tfdt boxes, presenting a similar problem at the beginning of each fragment. + const MediaTime contiguousFrameTolerance = MediaTime(1, 1000); + // When appending media containing B-frames (media whose samples' presentation timestamps // do not increase monotonically, the prior erase steps could leave samples in the trackBuffer // which will be disconnected from its previous I-frame. If the incoming frame is an I-frame, @@ -954,19 +962,36 @@ void SourceBufferPrivate::didReceiveSample(Ref&& originalSample) while (nextSyncSample != trackBuffer.samples().decodeOrder().end() && nextSyncSample->second->presentationTime() <= sample->presentationTime()) nextSyncSample = trackBuffer.samples().decodeOrder().findSyncSampleAfterDecodeIterator(nextSyncSample); - INFO_LOG(LOGIDENTIFIER, "Discovered out-of-order frames, from: ", *nextSampleInDecodeOrder->second.get(), " to: ", (nextSyncSample == trackBuffer.samples().decodeOrder().end() ? "[end]"_s : toString(*nextSyncSample->second.get()))); + if (nextSampleInDecodeOrder->second->presentationTime() < sample->presentationTime()) { + // Try to fix the out-of-ordering by placing the decoding timestamp of sample after the decoding timestamp + // of the last pre-existing sample before the next sync sample whis has a presentationTime lower than sample. + auto lastSampleToErase = trackBuffer.samples().decodeOrder().rbegin(); + if (nextSyncSample != trackBuffer.samples().decodeOrder().end()) { + lastSampleToErase = trackBuffer.samples().decodeOrder().reverseFindSampleWithDecodeKey(nextSyncSample->first); + lastSampleToErase++; + } + + if (lastSampleToErase != trackBuffer.samples().decodeOrder().rend()) { + if (lastSampleToErase->second->presentationTime() < sample->presentationTime()) { + const MediaTime epsilon = MediaTime(100, 1000000); // 100 µs. + auto safeDecodeTime = lastSampleToErase->second->decodeTime() + epsilon; + if (safeDecodeTime > sample->decodeTime() + && safeDecodeTime < (sample->decodeTime() + sample->duration() - contiguousFrameTolerance - contiguousFrameTolerance)) { + INFO_LOG(LOGIDENTIFIER, "Discovered out-of-order frames, from: ", *nextSampleInDecodeOrder->second, " to: ", (nextSyncSample == trackBuffer.samples().decodeOrder().end() ? "[end]"_s : toString(*nextSyncSample->second.get())), + ", but fixed the ordering by changing sample DTS from ", sample->decodeTime(), " to ", safeDecodeTime); + sample->setTimestamps(sample->presentationTime(), safeDecodeTime); + break; + } + } + } + } + + INFO_LOG(LOGIDENTIFIER, "Discovered out-of-order frames, from: ", *nextSampleInDecodeOrder->second, + " to: ", (nextSyncSample == trackBuffer.samples().decodeOrder().end() ? "[end]"_s : toString(*nextSyncSample->second.get())), + "erasing them"); erasedSamples.addRange(nextSampleInDecodeOrder, nextSyncSample); } while (false); - // There are many files out there where the frame times are not perfectly contiguous and may have small overlaps - // between the beginning of a frame and the end of the previous one; therefore a tolerance is needed whenever - // durations are considered. - // For instance, most WebM files are muxed rounded to the millisecond (the default TimecodeScale of the format) - // but their durations use a finer timescale (causing a sub-millisecond overlap). More rarely, there are also - // MP4 files with slightly off tfdt boxes, presenting a similar problem at the beginning of each fragment. - // Same as tolerance in SourceBuffer::canPlayThroughRange(). - const MediaTime contiguousFrameTolerance = MediaTime(1, 1000); - // If highest presentation timestamp for track buffer is set and less than or equal to presentation timestamp if (trackBuffer.highestPresentationTimestamp().isValid() && trackBuffer.highestPresentationTimestamp() - contiguousFrameTolerance <= presentationTimestamp) { // Remove all coded frames from track buffer that have a presentation timestamp greater than highest diff --git a/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp b/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp index 226af18e9e42e..6c28f287be51c 100644 --- a/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp +++ b/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp @@ -468,20 +468,15 @@ void AppendPipeline::appsinkNewSample(const Track& track, GRefPtr&& s GstSegment* segment = gst_sample_get_segment(sample.get()); auto mediaSample = MediaSampleGStreamer::create(WTFMove(sample), track.presentationSize, track.trackId); - /* this logic does not work when we insert piece of stream with different segment start - and then return back to original offset. It causes the inserted segment is shifted/overlapped. - if (segment && (segment->time || segment->start)) { // MP4 has the concept of edit lists, where some buffer time needs to be offsetted, often very slightly, // to get exact timestamps. - MediaTime pts = bufferTimeToStreamTime(segment, GST_BUFFER_PTS(buffer)); MediaTime dts = bufferTimeToStreamTime(segment, GST_BUFFER_DTS(buffer)); GST_TRACE_OBJECT(track.appsinkPad.get(), "Mapped buffer to segment, PTS %" GST_TIME_FORMAT " -> %s DTS %" GST_TIME_FORMAT " -> %s", GST_TIME_ARGS(GST_BUFFER_PTS(buffer)), pts.toString().utf8().data(), GST_TIME_ARGS(GST_BUFFER_DTS(buffer)), dts.toString().utf8().data()); mediaSample->setTimestamps(pts, dts); - - } else */ if (!GST_BUFFER_DTS(buffer) && GST_BUFFER_PTS(buffer) > 0 && GST_BUFFER_PTS(buffer) <= 100'000'000) { + } else if (!GST_BUFFER_DTS(buffer) && GST_BUFFER_PTS(buffer) > 0 && GST_BUFFER_PTS(buffer) <= 100'000'000) { // Because a track presentation time starting at some close to zero, but not exactly zero time can cause unexpected // results for applications, we extend the duration of this first sample to the left so that it starts at zero. // This is relevant for files that should have an edit list but don't, or when using GStreamer < 1.16, where