livepeer · stronk-dev · Oct 15, 2025 · Oct 15, 2025
diff --git a/ffmpeg/decoder.h b/ffmpeg/decoder.h
@@ -55,6 +55,12 @@ struct input_ctx {
   // In HW transcoding, demuxer is opened once and used,
   // so it is necessary to check whether the input pixel format does not change in the middle.
   enum AVPixelFormat last_format;
+
+  // per-segment tracking (reset for each segment)
+  int64_t segment_first_pts;       // best-effort pts of first decoded video frame
+  int64_t segment_last_pts;        // best-effort pts of most recent decoded video frame
+  int64_t segment_accum_duration;  // sum of decoded frame durations (input timebase)
+  int segment_pts_samples;         // number of decoded frames contributing timestamps
 };
 
 // Exported methods

diff --git a/ffmpeg/encoder.c b/ffmpeg/encoder.c
@@ -173,6 +173,10 @@ void free_output(struct output_ctx *octx)
   free_filter(&octx->vf);
   free_filter(&octx->af);
   free_filter(&octx->sf);
+  octx->segment_first_output_pts = AV_NOPTS_VALUE;
+  octx->segment_last_output_pts = AV_NOPTS_VALUE;
+  octx->guard_target_frame_duration = 0;
+  octx->guard_has_target_fps = 0;
 }
 
 int open_remux_output(struct input_ctx *ictx, struct output_ctx *octx)
@@ -422,6 +426,18 @@ int encode(AVCodecContext* encoder, AVFrame *frame, struct output_ctx* octx, AVS
     }
     octx->res->frames++;
     octx->res->pixels += encoder->width * encoder->height;
+
+    if (AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type) {
+      int64_t pts_out = frame ? frame->pts : AV_NOPTS_VALUE;
+      if (octx->segment_first_output_pts == AV_NOPTS_VALUE && pts_out != AV_NOPTS_VALUE)
+        octx->segment_first_output_pts = pts_out;
+      if (pts_out != AV_NOPTS_VALUE)
+        octx->segment_last_output_pts = pts_out;
+
+      int64_t step = frame && frame->duration ? frame->duration : octx->guard_target_frame_duration;
+      if (step <= 0) step = 1;
+      octx->segment_accum_output_duration += step;
+    }
   }
 
   // We don't want to send NULL frames for HW encoding
@@ -625,6 +641,54 @@ int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext
         }
       }
 
+      if (is_video && frame) {
+        // Guard checks keep the encoded timeline bounded by the source timeline.
+        // `segment_first_pts/last_pts` come from the decoder and `segment_accum_output_duration`
+        // is updated whenever a frame successfully encodes. Use both to ensure
+        // the next frame would not push the output timeline past the input.
+        AVRational in_tb = ictx->ic->streams[ictx->vi]->time_base;
+        AVRational out_tb = encoder->time_base;
+        int64_t input_start = av_rescale_q(ictx->segment_first_pts, in_tb, out_tb);
+        int64_t input_end = av_rescale_q(ictx->segment_last_pts, in_tb, out_tb);
+        int64_t implicit_dur = input_end - input_start;
+        if (implicit_dur < 0) implicit_dur = 0;
+        int64_t accum_dur = av_rescale_q(ictx->segment_accum_duration,
+                                          ictx->ic->streams[ictx->vi]->time_base,
+                                          out_tb);
+        int64_t input_duration = FFMAX(implicit_dur, accum_dur);
+
+        if (input_duration > 0) {
+          int64_t duration_slack = input_duration / 10; // allow 10% slack
+          int64_t min_slack = av_rescale_q(100, (AVRational){1, 1000}, out_tb); // min 100ms in encoder timebase
+          if (duration_slack < min_slack) duration_slack = min_slack;
+
+          if (octx->segment_accum_output_duration > input_duration + duration_slack) {
+            // Would emit more timeline than the source contained – abort the segment.
+            return lpms_ERR_OUTPUT_GUARD_DURATION;
+          }
+        }
+
+        if (octx->guard_has_target_fps) {
+          // When this rendition has an explicit FPS, compute the expected
+          // frame budget from the same input duration. The cached
+          // `guard_target_frame_duration` is the encoder-timebase duration of
+          // a single target frame.
+          if (octx->guard_target_frame_duration <= 0) {
+            int64_t frame_dur = av_rescale_q(1, av_inv_q(octx->fps), out_tb);
+            if (frame_dur < 1) frame_dur = 1;
+            octx->guard_target_frame_duration = frame_dur;
+          }
+          int64_t frame_dur = octx->guard_target_frame_duration;
+          int64_t expected_frames = (input_duration + frame_dur / 2) / frame_dur;
+          int64_t frame_slack = expected_frames / 10; // allow 10% slack
+          if (frame_slack < 5) frame_slack = 5; // min 5 frames
+          if (octx->res->frames > expected_frames + frame_slack) {
+            // Fixed-fps outputs should never exceed the predicted frame budget.
+            return lpms_ERR_OUTPUT_GUARD_FRAME_BUDGET;
+          }
+        }
+      }
+
       ret = encode(encoder, frame, octx, ost);
 skip:
     av_frame_unref(frame);

diff --git a/ffmpeg/ffmpeg_errors.go b/ffmpeg/ffmpeg_errors.go
@@ -20,6 +20,8 @@ var lpmsErrors = []struct {
 	{Code: C.lpms_ERR_INPUT_CODEC, Desc: "Unsupported input codec"},
 	{Code: C.lpms_ERR_INPUT_NOKF, Desc: "No keyframes in input"},
 	{Code: C.lpms_ERR_UNRECOVERABLE, Desc: "Unrecoverable state, restart process"},
+	{Code: C.lpms_ERR_OUTPUT_GUARD_DURATION, Desc: "Output duration larger than input duration"},
+	{Code: C.lpms_ERR_OUTPUT_GUARD_FRAME_BUDGET, Desc: "Output frame count larger than expected"},
 }
 
 func error_map() map[int]error {

diff --git a/ffmpeg/filter.c b/ffmpeg/filter.c
@@ -7,6 +7,7 @@
 
 #include <libavutil/opt.h>
 #include <libavutil/pixdesc.h>
+#include <libavutil/avutil.h>
 
 #include <assert.h>
 
@@ -100,14 +101,16 @@ int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx, AVFrame
     }
 
     /* buffer video sink: to terminate the filter chain. */
-    ret = avfilter_graph_create_filter(&vf->sink_ctx, buffersink,
-                                       "out", NULL, NULL, vf->graph);
-    if (ret < 0) LPMS_ERR(vf_init_cleanup, "Cannot create video buffer sink");
-
+    vf->sink_ctx = avfilter_graph_alloc_filter(vf->graph, buffersink, "out");
+    if (!vf->sink_ctx) {
+      ret = AVERROR(ENOMEM);
+      LPMS_ERR(vf_init_cleanup, "Cannot allocate video buffer sink");
+    }
     ret = av_opt_set_int_list(vf->sink_ctx, "pix_fmts", pix_fmts,
                               AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN);
     if (ret < 0) LPMS_ERR(vf_init_cleanup, "Cannot set output pixel format");
-
+    ret = avfilter_init_str(vf->sink_ctx, NULL);
+    if (ret < 0) LPMS_ERR(vf_init_cleanup, "Cannot initialize video buffer sink");
     ret = filtergraph_parser(vf, filters_descr, &inputs, &outputs);
     if (ret < 0) LPMS_ERR(vf_init_cleanup, "Unable to parse video filters desc");
 
@@ -256,13 +259,16 @@ int init_signature_filters(struct output_ctx *octx, AVFrame *inf)
     }
 
     /* buffer video sink: to terminate the filter chain. */
-    ret = avfilter_graph_create_filter(&sf->sink_ctx, buffersink,
-                                       "out", NULL, NULL, sf->graph);
-    if (ret < 0) LPMS_ERR(sf_init_cleanup, "Cannot create video buffer sink");
-
+    sf->sink_ctx = avfilter_graph_alloc_filter(sf->graph, buffersink, "out");
+    if (!sf->sink_ctx) {
+      ret = AVERROR(ENOMEM);
+      LPMS_ERR(sf_init_cleanup, "Cannot allocate signature buffer sink");
+    }
     ret = av_opt_set_int_list(sf->sink_ctx, "pix_fmts", pix_fmts,
                               AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN);
     if (ret < 0) LPMS_ERR(sf_init_cleanup, "Cannot set output pixel format");
+    ret = avfilter_init_str(sf->sink_ctx, NULL);
+    if (ret < 0) LPMS_ERR(sf_init_cleanup, "Cannot initialize signature buffer sink");
 
     ret = filtergraph_parser(sf, filters_descr, &inputs, &outputs);
     if (ret < 0) LPMS_ERR(sf_init_cleanup, "Unable to parse signature filters desc");

diff --git a/ffmpeg/filter.h b/ffmpeg/filter.h
@@ -81,6 +81,13 @@ struct output_ctx {
 
   output_results  *res; // data to return for this output
   char *xcoderParams;
+
+  int64_t segment_first_output_pts;
+  int64_t segment_last_output_pts;
+  int64_t segment_accum_output_duration;
+  int64_t guard_target_frame_duration; // cached duration of one frame in encoder timebase when fps is fixed
+  int guard_has_target_fps;            // non-zero when this rendition enforces a constant fps
+
 };
 
 int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx, AVFrame *inf);

diff --git a/ffmpeg/transcoder.c b/ffmpeg/transcoder.c
@@ -9,6 +9,7 @@
 #include <libavformat/avformat.h>
 #include <libavfilter/avfilter.h>
 #include <libavfilter/buffersrc.h>
+#include <libavutil/avutil.h>
 #include <stdbool.h>
 
 // Not great to appropriate internal API like this...
@@ -20,6 +21,8 @@ const int lpms_ERR_PACKET_ONLY = FFERRTAG('P','K','O','N');
 const int lpms_ERR_FILTER_FLUSHED = FFERRTAG('F','L','F','L');
 const int lpms_ERR_OUTPUTS = FFERRTAG('O','U','T','P');
 const int lpms_ERR_UNRECOVERABLE = FFERRTAG('U', 'N', 'R', 'V');
+const int lpms_ERR_OUTPUT_GUARD_DURATION = FFERRTAG('O', 'G', 'R', 'D');
+const int lpms_ERR_OUTPUT_GUARD_FRAME_BUDGET = FFERRTAG('O', 'G', 'R', 'F');
 
 //
 //  Notes on transcoder internals:
@@ -326,6 +329,19 @@ int transcode(struct transcode_thread *h,
   int nb_outputs = h->nb_outputs;
   int outputs_ready = 0, hit_eof = 0;
 
+  ictx->segment_first_pts = AV_NOPTS_VALUE;
+  ictx->segment_last_pts = AV_NOPTS_VALUE;
+  ictx->segment_pts_samples = 0;
+  ictx->segment_accum_duration = 0;
+
+  for (int i = 0; i < nb_outputs; i++) {
+    outputs[i].segment_first_output_pts = AV_NOPTS_VALUE;
+    outputs[i].segment_last_output_pts = AV_NOPTS_VALUE;
+    outputs[i].segment_accum_output_duration = 0;
+    outputs[i].guard_target_frame_duration = 0;
+    outputs[i].guard_has_target_fps = outputs[i].fps.den != 0;
+  }
+
   ipkt = av_packet_alloc();
   if (!ipkt) LPMS_ERR(transcode_cleanup, "Unable to allocated packet");
   dframe = av_frame_alloc();
@@ -427,6 +443,18 @@ int transcode(struct transcode_thread *h,
       decoded_results->pixels += dframe->width * dframe->height;
       has_frame = has_frame && dframe->width && dframe->height;
       if (has_frame) last_frame = ictx->last_frame_v;
+      if (has_frame) {
+        int64_t pts = dframe->pts;
+        if (pts == AV_NOPTS_VALUE) pts = dframe->best_effort_timestamp;
+        if (pts == AV_NOPTS_VALUE && ictx->segment_pts_samples > 0 && dframe->duration)
+          pts = ictx->segment_last_pts + dframe->duration;
+        if (ictx->segment_first_pts == AV_NOPTS_VALUE && pts != AV_NOPTS_VALUE)
+          ictx->segment_first_pts = pts;
+        if (pts != AV_NOPTS_VALUE) ictx->segment_last_pts = pts;
+        ictx->segment_pts_samples++;
+        if (dframe->duration)
+          ictx->segment_accum_duration += dframe->duration;
+      }
     } else if (AVMEDIA_TYPE_AUDIO == ist->codecpar->codec_type) {
       has_frame = has_frame && dframe->nb_samples;
       if (has_frame) last_frame = ictx->last_frame_a;

diff --git a/ffmpeg/transcoder.h b/ffmpeg/transcoder.h
@@ -17,6 +17,8 @@ extern const int lpms_ERR_PACKET_ONLY;
 extern const int lpms_ERR_FILTER_FLUSHED;
 extern const int lpms_ERR_OUTPUTS;
 extern const int lpms_ERR_UNRECOVERABLE;
+extern const int lpms_ERR_OUTPUT_GUARD_DURATION;
+extern const int lpms_ERR_OUTPUT_GUARD_FRAME_BUDGET;
 
 struct transcode_thread;