Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 83 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,43 @@ vod:
width: 1920
height: 1080
bitrate: 5000
1080p_nvidia_gpu:
width: 1920
height: 1080
bitrate: 5000
# Optional ffmpeg video overrides
encoder: h264_nvenc # default "libx264"
preset: p1 # default "faster"
profile: high # default "high"
level: auto # default "4.0"
extra-args: # optionally, additional ffmpeg video encoder arguments
- "-tune:v=ull" # can be passed either as combined args, and will be split
- "-rc:v" # or parameter ...
- "cbr" # ... and value on separate lines
# Optional filtergraph, start each chain with the special `[vin]` pad and end them in `[vout]`
filtergraph:
- "[vin]split=2[v1][v2]" # duplicate input
- "[v1]crop=iw/2:ih:0:0,hflip[left]" # left half mirrored horizontally
- "[v2]crop=iw/2:ih:iw/2:0,vflip[right]" # right half flipped vertically
- "[left][right]hstack[vout]" # join halves back together

# HLS-VOD segment behaviour (optional)
segment-length: 4 # nominal segment length in seconds
segment-offset: 1 # allowed +/- tolerance in seconds
segment-buffer-min: 3 # min segments ahead of playhead
segment-buffer-max: 5 # max segments transcoded at once

# Use video keyframes as existing reference for chunks split
# Using this might cause long probing times in order to get
# all keyframes - therefore they should be cached
# all keyframes - therefore they should be cached
video-keyframes: false
# Single audio profile used
audio-profile:
bitrate: 192 # kbps
encoder: aac # default "aac", but "copy" is an alternative
bitrate: 192 # kbps
# Optional filtergraph, start each chain with the special `[ain]` pad and end them in `[aout]`
# filtergraph:
# - "[ain]asetrate=48000*1.5,aresample=48000[aout]" # Pitch the audio up by ~50 % (makes everyone sound like that famous mouse!)
# If cache is enabled
cache: true
# If dir is empty, cache will be stored in the same directory as media source
Expand All @@ -127,6 +157,57 @@ hls-proxy:
my_server: http://192.168.1.34:9981
```

## Defining filter graphs on video and audio streams

You can optionally define filtergraphs on video and audio profiles. This
allows you to modify the streams during the transcoding process.

If you don't specify any filtergraphs, you get the video scaled to the
dimensions you specified and the first audio track from the input video.

When you do supply a filtergraph:

* start the chain at the source pads `[vin]` (video) or `[ain]` (audio)
* end the chain at `[vout]` or `[aout]` – these pads are what `-map` picks up

Examples:

```yaml
vod:
video-profiles:
1080p:
width: 1920
height: 1080
bitrate: 5000
filtergraph:
- "[vin]format=pix_fmts=yuv420p[vout]" # change pixel format to yuv420p
```

```yaml
vod:
audio-profile:
filtergraph:
- "[ain][0:a:1]amix=inputs=2[aout]" # mix second audio track into the first
```

### Implementation

The transcoder always assembles a single FFmpeg `-filter_complex` that already contains **one video and one audio chain**:

1. `[0:v]scale=…[vin]` – scales the first video stream and stores the result in pad `[vin]`.
2. `[0:a]anull[ain]` – passes the first audio stream through unchanged into pad `[ain]`.
3. If *no* extra filtergraph is supplied the code auto-adds `[vin]null[vout] ; [ain]anull[aout]` so the outputs exist.

Both pads are then selected with:

```sh
-map [vout] -map [aout]?
```

`-map` tells FFmpeg exactly which streams (by pad name or by input index) should
be written to the current output file. Being explicit prevents surprises when
inputs carry multiple audio/video streams.

## Transcoding profiles for live streams

go-transcode supports any formats that ffmpeg likes. We provide profiles out-of-the-box for h264+aac (mp4 container) for 360p, 540p, 720p and 1080p resolutions: `h264_360p`, `h264_540p`, `h264_720p` and `h264_1080p`. Profiles can have any name, but must match regex: `^[0-9A-Za-z_-]+$`
Expand Down
21 changes: 17 additions & 4 deletions hlsvod/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,28 @@ type ManagerCtx struct {
}

func New(config Config) *ManagerCtx {
// apply defaults if zero
if config.SegmentLength == 0 {
config.SegmentLength = 4
}
if config.SegmentOffset == 0 {
config.SegmentOffset = 1
}
if config.SegmentBufferMin == 0 {
config.SegmentBufferMin = 3
}
if config.SegmentBufferMax == 0 {
config.SegmentBufferMax = 5
}
ctx, cancel := context.WithCancel(context.Background())
return &ManagerCtx{
logger: log.With().Str("module", "hlsvod").Str("submodule", "manager").Logger(),
config: config,

segmentLength: 4,
segmentOffset: 1,
segmentBufferMin: 3,
segmentBufferMax: 5,
segmentLength: config.SegmentLength,
segmentOffset: config.SegmentOffset,
segmentBufferMin: config.SegmentBufferMin,
segmentBufferMax: config.SegmentBufferMax,

ctx: ctx,
cancel: cancel,
Expand Down
100 changes: 85 additions & 15 deletions hlsvod/transcode.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,20 @@ type VideoProfile struct {
Width int
Height int
Bitrate int // in kilobytes

// Optional FFmpeg overrides
Encoder string
Preset string
Profile string
Level string
ExtraArgs []string
FilterGraph []string
}

type AudioProfile struct {
Bitrate int // in kilobytes
Encoder string // audio encoder (e.g., "aac", "copy", "libopus")
Bitrate int // in kilobytes (0 means use encoder default)
FilterGraph []string // optional audio filtergraph chains
}

// returns a channel, that delivers name of the segments as they are encoded
Expand Down Expand Up @@ -78,35 +88,95 @@ func TranscodeSegments(ctx context.Context, ffmpegBinary string, config Transcod
"-sn", // No subtitles
}...)

// Video specs
// Filtergraph (scaling + optional user graph)
if config.VideoProfile != nil {
profile := config.VideoProfile

var scale string
// Build scale expression producing [vin] source pad
var scaleExpr string
if profile.Width >= profile.Height {
scale = fmt.Sprintf("scale=-2:%d", profile.Height)
scaleExpr = fmt.Sprintf("[0:v]scale=-2:%d[vin]", profile.Height)
} else {
scale = fmt.Sprintf("scale=%d:-2", profile.Width)
scaleExpr = fmt.Sprintf("[0:v]scale=%d:-2[vin]", profile.Width)
}

// Source audio pad
audioIn := "[0:a]anull[ain]"

graphParts := []string{scaleExpr, audioIn}

// Video filters
if len(profile.FilterGraph) > 0 {
graphParts = append(graphParts, profile.FilterGraph...)
} else {
graphParts = append(graphParts, "[vin]null[vout]")
}
// Audio filters
if config.AudioProfile != nil && len(config.AudioProfile.FilterGraph) > 0 {
graphParts = append(graphParts, config.AudioProfile.FilterGraph...)
} else {
graphParts = append(graphParts, "[ain]anull[aout]")
}
combinedFG := strings.Join(graphParts, ";")
// Add filter graph and explicit stream mapping (video & audio)
args = append(args, "-filter_complex", combinedFG)
args = append(args, "-map", "[vout]", "-map", "[aout]?")
}

// Video specs
if config.VideoProfile != nil {
profile := config.VideoProfile

// apply defaults if empty
encoder := profile.Encoder
if encoder == "" {
encoder = "libx264"
}
preset := profile.Preset
if preset == "" {
preset = "faster"
}
prof := profile.Profile
if prof == "" {
prof = "high"
}
lvl := profile.Level
if lvl == "" {
lvl = "4.0"
}

args = append(args, []string{
"-vf", scale,
"-c:v", "libx264",
"-preset", "faster",
"-profile:v", "high",
"-level:v", "4.0",
"-c:v", encoder,
"-preset", preset,
"-profile:v", prof,
"-level:v", lvl,
"-b:v", fmt.Sprintf("%dk", profile.Bitrate),
}...)

// extra args
if len(profile.ExtraArgs) > 0 {
extraArgs := make([]string, 0, len(profile.ExtraArgs))
for _, arg := range profile.ExtraArgs {
// Split combined args like "-tune:v=ull" into "-tune:v", "ull"
if strings.Contains(arg, "=") {
extraArgs = append(extraArgs, strings.SplitN(arg, "=", 2)...)
} else {
extraArgs = append(extraArgs, arg)
}
}
args = append(args, extraArgs...)
}
}

// Audio specs
if config.AudioProfile != nil {
profile := config.AudioProfile

args = append(args, []string{
"-c:a", "aac",
"-b:a", fmt.Sprintf("%dk", profile.Bitrate),
}...)
if profile.Encoder != "" {
args = append(args, "-c:a", profile.Encoder)
if profile.Bitrate > 0 {
args = append(args, "-b:a", fmt.Sprintf("%dk", profile.Bitrate))
}
}
}

// Segmenting specs
Expand Down
6 changes: 6 additions & 0 deletions hlsvod/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ type Config struct {
VideoKeyframes bool
AudioProfile *AudioProfile

// HLS-VOD segment parameters (override defaults from server)
SegmentLength float64
SegmentOffset float64
SegmentBufferMin int
SegmentBufferMax int

Cache bool
CacheDir string // If not empty, cache will folder will be used instead of media path

Expand Down
18 changes: 15 additions & 3 deletions internal/api/hlsvod.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,15 +144,27 @@ func (a *ApiManagerCtx) HlsVod(r chi.Router) {
SegmentPrefix: profileID,

VideoProfile: &hlsvod.VideoProfile{
Width: profile.Width,
Height: profile.Height,
Bitrate: profile.Bitrate,
Width: profile.Width,
Height: profile.Height,
Bitrate: profile.Bitrate,
Encoder: profile.Encoder,
Preset: profile.Preset,
Profile: profile.Profile,
Level: profile.Level,
FilterGraph: profile.FilterGraph,
ExtraArgs: profile.ExtraArgs,
},
VideoKeyframes: a.config.Vod.VideoKeyframes,
AudioProfile: &hlsvod.AudioProfile{
Bitrate: a.config.Vod.AudioProfile.Bitrate,
FilterGraph: a.config.Vod.AudioProfile.FilterGraph,
},

SegmentLength: a.config.Vod.SegmentLength,
SegmentOffset: a.config.Vod.SegmentOffset,
SegmentBufferMin: a.config.Vod.SegmentBufferMin,
SegmentBufferMax: a.config.Vod.SegmentBufferMax,

Cache: a.config.Vod.Cache,
CacheDir: a.config.Vod.CacheDir,

Expand Down
Loading