Skip to content
49 changes: 48 additions & 1 deletion frontend/src/components/VideoOutput.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { useEffect, useRef, useState, useCallback } from "react";
import { Volume2, VolumeX } from "lucide-react";
import { Card, CardContent, CardHeader, CardTitle } from "./ui/card";
import { Spinner } from "./ui/spinner";
import { PlayOverlay } from "./ui/play-overlay";
Expand Down Expand Up @@ -49,15 +50,47 @@ export function VideoOutput({
const [isFadingOut, setIsFadingOut] = useState(false);
const overlayTimeoutRef = useRef<number | null>(null);

// Audio state: start muted to comply with browser autoplay policy.
// User can click the speaker icon to unmute once the stream is playing.
const [isMuted, setIsMuted] = useState(true);
const [hasAudioTrack, setHasAudioTrack] = useState(false);

// Use external ref if provided, otherwise use internal
const containerRef = videoContainerRef || internalContainerRef;

useEffect(() => {
if (videoRef.current && remoteStream) {
videoRef.current.srcObject = remoteStream;

// Check if the stream contains an audio track
const audioTracks = remoteStream.getAudioTracks();
setHasAudioTrack(audioTracks.length > 0);

// Listen for tracks being added later (audio may arrive after video)
const handleTrackAdded = () => {
const tracks = remoteStream.getAudioTracks();
setHasAudioTrack(tracks.length > 0);
};
remoteStream.addEventListener("addtrack", handleTrackAdded);

return () => {
remoteStream.removeEventListener("addtrack", handleTrackAdded);
};
}
}, [remoteStream]);

// Sync muted state to the video element
useEffect(() => {
if (videoRef.current) {
videoRef.current.muted = isMuted;
}
}, [isMuted]);

const toggleMute = useCallback((e: React.MouseEvent) => {
e.stopPropagation(); // Don't trigger play/pause or pointer lock
setIsMuted(prev => !prev);
}, []);

// Listen for video playing event to notify parent
useEffect(() => {
const video = videoRef.current;
Expand Down Expand Up @@ -174,9 +207,23 @@ export function VideoOutput({
: "max-w-full max-h-full object-contain"
}
autoPlay
muted
muted={isMuted}
playsInline
/>
{/* Audio mute/unmute toggle - only shown when stream has audio */}
{hasAudioTrack && (
<button
onClick={toggleMute}
className="absolute bottom-4 right-4 p-2 rounded-lg bg-black/60 hover:bg-black/80 text-white transition-colors z-10"
title={isMuted ? "Unmute audio" : "Mute audio"}
>
{isMuted ? (
<VolumeX className="w-5 h-5" />
) : (
<Volume2 className="w-5 h-5" />
)}
</button>
)}
{/* Play/Pause Overlay */}
{showOverlay && (
<div className="absolute inset-0 flex items-center justify-center pointer-events-none">
Expand Down
20 changes: 16 additions & 4 deletions frontend/src/hooks/useUnifiedWebRTC.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,11 @@
transceiver = pc.addTransceiver("video");
}

// Add a receive-only audio transceiver so the SDP offer includes an
// audio m-line. The backend will attach its audio track to this
// transceiver after processing the offer.
pc.addTransceiver("audio", { direction: "recvonly" });

// Force VP8-only for aiortc compatibility
if (transceiver) {
const codecs = RTCRtpReceiver.getCapabilities("video")?.codecs || [];
Expand All @@ -221,11 +226,18 @@
}

// Event handlers
// Collect all incoming tracks (video + audio) into a single MediaStream.
// The backend sends video and audio as separate tracks; we merge them
// into one MediaStream for the <video> element.
const combinedStream = new MediaStream();
pc.ontrack = (evt: RTCTrackEvent) => {
if (evt.streams && evt.streams[0]) {
console.log("[UnifiedWebRTC] Setting remote stream");
setRemoteStream(evt.streams[0]);
}
console.log(
`[UnifiedWebRTC] Track received: ${evt.track.kind} (id: ${evt.track.id})`
);
combinedStream.addTrack(evt.track);
// Create a new MediaStream wrapper so React detects the state change
// (same object reference would not trigger a re-render)
setRemoteStream(new MediaStream(combinedStream.getTracks()));
};

pc.onconnectionstatechange = () => {
Expand Down Expand Up @@ -410,7 +422,7 @@
setIsConnecting(false);
}
},
[isConnecting, options, fetchIceServers, sendOffer, sendIceCandidate]

Check warning on line 425 in frontend/src/hooks/useUnifiedWebRTC.ts

View workflow job for this annotation

GitHub Actions / Frontend Linting (ESLint + Prettier)

React Hook useCallback has a missing dependency: 'isCloudMode'. Either include it or remove the dependency array
);

const updateVideoTrack = useCallback(
Expand Down
6 changes: 6 additions & 0 deletions src/scope/core/ndi/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,9 @@ def setup_send_functions(lib: ctypes.CDLL) -> None:
ctypes.c_void_p,
ctypes.POINTER(NDIlib_video_frame_v2_t),
]

lib.NDIlib_send_send_audio_v2.restype = None
lib.NDIlib_send_send_audio_v2.argtypes = [
ctypes.c_void_p,
ctypes.POINTER(NDIlib_audio_frame_v2_t),
]
58 changes: 57 additions & 1 deletion src/scope/core/outputs/ndi.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""NDI output sink implementation.

Sends processed video frames over the network via NDI.
Sends processed video frames and audio over the network via NDI.
Uses the shared NDI ctypes bindings from scope.core.ndi.
"""

Expand All @@ -13,6 +13,7 @@

from scope.core.ndi import (
NDI_FOURCC_RGBA,
NDIlib_audio_frame_v2_t,
NDIlib_send_create_t,
NDIlib_video_frame_v2_t,
load_library,
Expand Down Expand Up @@ -162,6 +163,61 @@ def send_frame(self, frame: np.ndarray | torch.Tensor) -> bool:
logger.error(f"Error sending NDI frame: {e}")
return False

def send_audio(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice that you added the audio for NDI as well 🏅

self,
audio: np.ndarray | torch.Tensor,
sample_rate: int,
num_channels: int,
) -> bool:
"""Send audio samples over NDI.

Args:
audio: Float32 audio samples. Shape (S,) for mono or (C, S) for multi-channel.
Values should be in [-1.0, 1.0] range.
sample_rate: Audio sample rate (e.g. 48000).
num_channels: Number of audio channels (e.g. 1 for mono).

Returns:
True if send was successful.
"""
if self._send_instance is None or self._lib is None:
return False

try:
if isinstance(audio, torch.Tensor):
if audio.is_cuda:
audio = audio.cpu()
audio = audio.numpy()

audio = np.asarray(audio, dtype=np.float32)

# Ensure contiguous
if not audio.flags["C_CONTIGUOUS"]:
audio = np.ascontiguousarray(audio)

# NDI expects interleaved float32 samples
# For mono: shape (S,), for multi-channel: shape (C*S,) interleaved
num_samples = audio.shape[-1] if audio.ndim > 1 else len(audio)

audio_frame = NDIlib_audio_frame_v2_t()
audio_frame.sample_rate = sample_rate
audio_frame.no_channels = num_channels
audio_frame.no_samples = num_samples
audio_frame.timecode = -1 # auto
audio_frame.p_data = audio.ctypes.data
audio_frame.channel_stride_in_bytes = num_samples * 4 # float32 = 4 bytes
audio_frame.p_metadata = None
audio_frame.timestamp = -1 # auto

self._lib.NDIlib_send_send_audio_v2(
self._send_instance, ctypes.byref(audio_frame)
)
return True

except Exception as e:
logger.error(f"Error sending NDI audio: {e}")
return False

def resize(self, width: int, height: int):
"""Update output dimensions (NDI rebuilds frame struct per-send)."""
self._width = width
Expand Down
Loading
Loading