m1k1o · hheimbuerger · Jun 17, 2025 · Jun 17, 2025 · Jun 18, 2025 · Jun 19, 2025
diff --git a/README.md b/README.md
@@ -106,13 +106,43 @@ vod:
       width: 1920
       height: 1080
       bitrate: 5000
+    1080p_nvidia_gpu:
+      width: 1920
+      height: 1080
+      bitrate: 5000
+      # Optional ffmpeg video overrides
+      encoder: h264_nvenc   # default "libx264"
+      preset: p1          # default "faster"
+      profile: high       # default "high"
+      level: auto         # default "4.0"
+      extra-args:         # optionally, additional ffmpeg video encoder arguments
+        - "-tune:v=ull"   # can be passed either as combined args, and will be split
+        - "-rc:v"         # or parameter ...
+        - "cbr"           # ... and value on separate lines
+      # Optional filtergraph, start each chain with the special `[vin]` pad and end them in `[vout]`
+      filtergraph:
+        - "[vin]split=2[v1][v2]"                # duplicate input
+        - "[v1]crop=iw/2:ih:0:0,hflip[left]"     # left half mirrored horizontally
+        - "[v2]crop=iw/2:ih:iw/2:0,vflip[right]" # right half flipped vertically
+        - "[left][right]hstack[vout]"            # join halves back together
+
+  # HLS-VOD segment behaviour (optional)
+  segment-length: 4        # nominal segment length in seconds
+  segment-offset: 1        # allowed +/- tolerance in seconds
+  segment-buffer-min: 3    # min segments ahead of playhead
+  segment-buffer-max: 5    # max segments transcoded at once
+
   # Use video keyframes as existing reference for chunks split
   # Using this might cause long probing times in order to get
-  # all keyframes - therefore they should be cached
+  # all keyframes - therefore they should be cached
   video-keyframes: false
   # Single audio profile used
   audio-profile:
-    bitrate: 192 # kbps
+    encoder: aac    # default "aac", but "copy" is an alternative
+    bitrate: 192  # kbps
+    # Optional filtergraph, start each chain with the special `[ain]` pad and end them in `[aout]`
+    # filtergraph:
+    # - "[ain]asetrate=48000*1.5,aresample=48000[aout]" # Pitch the audio up by ~50 % (makes everyone sound like that famous mouse!)
   # If cache is enabled
   cache: true
   # If dir is empty, cache will be stored in the same directory as media source
@@ -127,6 +157,57 @@ hls-proxy:
   my_server: http://192.168.1.34:9981
 ```
 
+## Defining filter graphs on video and audio streams
+
+You can optionally define filtergraphs on video and audio profiles. This
+allows you to modify the streams during the transcoding process.
+
+If you don't specify any filtergraphs, you get the video scaled to the
+dimensions you specified and the first audio track from the input video.
+
+When you do supply a filtergraph:
+
+* start the chain at the source pads `[vin]` (video) or `[ain]` (audio)  
+* end the chain at `[vout]` or `[aout]` – these pads are what `-map` picks up
+
+Examples:
+
+```yaml
+vod:
+  video-profiles:
+    1080p:
+      width: 1920
+      height: 1080
+      bitrate: 5000
+      filtergraph:
+        - "[vin]format=pix_fmts=yuv420p[vout]"   # change pixel format to yuv420p
+```
+
+```yaml
+vod:
+  audio-profile:
+    filtergraph:
+      - "[ain][0:a:1]amix=inputs=2[aout]"   # mix second audio track into the first
+```
+
+### Implementation
+
+The transcoder always assembles a single FFmpeg `-filter_complex` that already contains **one video and one audio chain**:
+
+1. `[0:v]scale=…[vin]` – scales the first video stream and stores the result in pad `[vin]`.
+2. `[0:a]anull[ain]` – passes the first audio stream through unchanged into pad `[ain]`.
+3. If *no* extra filtergraph is supplied the code auto-adds `[vin]null[vout] ; [ain]anull[aout]` so the outputs exist.
+
+Both pads are then selected with:
+
+```sh
+-map [vout] -map [aout]?
+```
+
+`-map` tells FFmpeg exactly which streams (by pad name or by input index) should
+be written to the current output file. Being explicit prevents surprises when
+inputs carry multiple audio/video streams.
+
 ## Transcoding profiles for live streams
 
 go-transcode supports any formats that ffmpeg likes. We provide profiles out-of-the-box for h264+aac (mp4 container) for 360p, 540p, 720p and 1080p resolutions: `h264_360p`, `h264_540p`, `h264_720p` and `h264_1080p`. Profiles can have any name, but must match regex: `^[0-9A-Za-z_-]+$`

diff --git a/hlsvod/manager.go b/hlsvod/manager.go
@@ -53,15 +53,28 @@ type ManagerCtx struct {
 }
 
 func New(config Config) *ManagerCtx {
+	// apply defaults if zero
+	if config.SegmentLength == 0 {
+		config.SegmentLength = 4
+	}
+	if config.SegmentOffset == 0 {
+		config.SegmentOffset = 1
+	}
+	if config.SegmentBufferMin == 0 {
+		config.SegmentBufferMin = 3
+	}
+	if config.SegmentBufferMax == 0 {
+		config.SegmentBufferMax = 5
+	}
 	ctx, cancel := context.WithCancel(context.Background())
 	return &ManagerCtx{
 		logger: log.With().Str("module", "hlsvod").Str("submodule", "manager").Logger(),
 		config: config,
 
-		segmentLength:    4,
-		segmentOffset:    1,
-		segmentBufferMin: 3,
-		segmentBufferMax: 5,
+		segmentLength:    config.SegmentLength,
+		segmentOffset:    config.SegmentOffset,
+		segmentBufferMin: config.SegmentBufferMin,
+		segmentBufferMax: config.SegmentBufferMax,
 
 		ctx:    ctx,
 		cancel: cancel,

diff --git a/hlsvod/transcode.go b/hlsvod/transcode.go
@@ -26,10 +26,20 @@ type VideoProfile struct {
 	Width   int
 	Height  int
 	Bitrate int // in kilobytes
+
+	// Optional FFmpeg overrides
+	Encoder     string
+	Preset      string
+	Profile     string
+	Level       string
+	ExtraArgs   []string
+	FilterGraph []string
 }
 
 type AudioProfile struct {
-	Bitrate int // in kilobytes
+	Encoder     string   // audio encoder (e.g., "aac", "copy", "libopus")
+	Bitrate     int      // in kilobytes (0 means use encoder default)
+	FilterGraph []string // optional audio filtergraph chains
 }
 
 // returns a channel, that delivers name of the segments as they are encoded
@@ -78,35 +88,95 @@ func TranscodeSegments(ctx context.Context, ffmpegBinary string, config Transcod
 		"-sn", // No subtitles
 	}...)
 
-	// Video specs
+	// Filtergraph (scaling + optional user graph)
 	if config.VideoProfile != nil {
 		profile := config.VideoProfile
 
-		var scale string
+		// Build scale expression producing [vin] source pad
+		var scaleExpr string
 		if profile.Width >= profile.Height {
-			scale = fmt.Sprintf("scale=-2:%d", profile.Height)
+			scaleExpr = fmt.Sprintf("[0:v]scale=-2:%d[vin]", profile.Height)
 		} else {
-			scale = fmt.Sprintf("scale=%d:-2", profile.Width)
+			scaleExpr = fmt.Sprintf("[0:v]scale=%d:-2[vin]", profile.Width)
+		}
+
+		// Source audio pad
+		audioIn := "[0:a]anull[ain]"
+
+		graphParts := []string{scaleExpr, audioIn}
+
+		// Video filters
+		if len(profile.FilterGraph) > 0 {
+			graphParts = append(graphParts, profile.FilterGraph...)
+		} else {
+			graphParts = append(graphParts, "[vin]null[vout]")
+		}
+		// Audio filters
+		if config.AudioProfile != nil && len(config.AudioProfile.FilterGraph) > 0 {
+			graphParts = append(graphParts, config.AudioProfile.FilterGraph...)
+		} else {
+			graphParts = append(graphParts, "[ain]anull[aout]")
+		}
+		combinedFG := strings.Join(graphParts, ";")
+		// Add filter graph and explicit stream mapping (video & audio)
+		args = append(args, "-filter_complex", combinedFG)
+		args = append(args, "-map", "[vout]", "-map", "[aout]?")
+	}
+
+	// Video specs
+	if config.VideoProfile != nil {
+		profile := config.VideoProfile
+
+		// apply defaults if empty
+		encoder := profile.Encoder
+		if encoder == "" {
+			encoder = "libx264"
+		}
+		preset := profile.Preset
+		if preset == "" {
+			preset = "faster"
+		}
+		prof := profile.Profile
+		if prof == "" {
+			prof = "high"
+		}
+		lvl := profile.Level
+		if lvl == "" {
+			lvl = "4.0"
 		}
 
 		args = append(args, []string{
-			"-vf", scale,
-			"-c:v", "libx264",
-			"-preset", "faster",
-			"-profile:v", "high",
-			"-level:v", "4.0",
+			"-c:v", encoder,
+			"-preset", preset,
+			"-profile:v", prof,
+			"-level:v", lvl,
 			"-b:v", fmt.Sprintf("%dk", profile.Bitrate),
 		}...)
+
+		// extra args
+		if len(profile.ExtraArgs) > 0 {
+			extraArgs := make([]string, 0, len(profile.ExtraArgs))
+			for _, arg := range profile.ExtraArgs {
+				// Split combined args like "-tune:v=ull" into "-tune:v", "ull"
+				if strings.Contains(arg, "=") {
+					extraArgs = append(extraArgs, strings.SplitN(arg, "=", 2)...)
+				} else {
+					extraArgs = append(extraArgs, arg)
+				}
+			}
+			args = append(args, extraArgs...)
+		}
 	}
 
 	// Audio specs
 	if config.AudioProfile != nil {
 		profile := config.AudioProfile
-
-		args = append(args, []string{
-			"-c:a", "aac",
-			"-b:a", fmt.Sprintf("%dk", profile.Bitrate),
-		}...)
+		if profile.Encoder != "" {
+			args = append(args, "-c:a", profile.Encoder)
+			if profile.Bitrate > 0 {
+				args = append(args, "-b:a", fmt.Sprintf("%dk", profile.Bitrate))
+			}
+		}
 	}
 
 	// Segmenting specs

diff --git a/hlsvod/types.go b/hlsvod/types.go
@@ -14,6 +14,12 @@ type Config struct {
 	VideoKeyframes bool
 	AudioProfile   *AudioProfile
 
+	// HLS-VOD segment parameters (override defaults from server)
+	SegmentLength    float64
+	SegmentOffset    float64
+	SegmentBufferMin int
+	SegmentBufferMax int
+
 	Cache    bool
 	CacheDir string // If not empty, cache will folder will be used instead of media path
 

diff --git a/internal/api/hlsvod.go b/internal/api/hlsvod.go
@@ -144,15 +144,27 @@ func (a *ApiManagerCtx) HlsVod(r chi.Router) {
 				SegmentPrefix: profileID,
 
 				VideoProfile: &hlsvod.VideoProfile{
-					Width:   profile.Width,
-					Height:  profile.Height,
-					Bitrate: profile.Bitrate,
+					Width:       profile.Width,
+					Height:      profile.Height,
+					Bitrate:     profile.Bitrate,
+					Encoder:     profile.Encoder,
+					Preset:      profile.Preset,
+					Profile:     profile.Profile,
+					Level:       profile.Level,
+					FilterGraph: profile.FilterGraph,
+					ExtraArgs:   profile.ExtraArgs,
 				},
 				VideoKeyframes: a.config.Vod.VideoKeyframes,
 				AudioProfile: &hlsvod.AudioProfile{
 					Bitrate: a.config.Vod.AudioProfile.Bitrate,
+					FilterGraph: a.config.Vod.AudioProfile.FilterGraph,
 				},
 
+				SegmentLength:    a.config.Vod.SegmentLength,
+				SegmentOffset:    a.config.Vod.SegmentOffset,
+				SegmentBufferMin: a.config.Vod.SegmentBufferMin,
+				SegmentBufferMax: a.config.Vod.SegmentBufferMax,
+
 				Cache:    a.config.Vod.Cache,
 				CacheDir: a.config.Vod.CacheDir,