From b7b56021100f5f7e8edc4c1684d3924432b1727d Mon Sep 17 00:00:00 2001 From: jck411 <81551487+jck411@users.noreply.github.com> Date: Mon, 19 Jan 2026 19:17:32 -0500 Subject: [PATCH] Trim voice state handling --- frontend-voice/src/App.jsx | 202 ++++---------------- frontend-voice/src/hooks/useAudioCapture.js | 60 +----- 2 files changed, 43 insertions(+), 219 deletions(-) diff --git a/frontend-voice/src/App.jsx b/frontend-voice/src/App.jsx index b958475..fd03e3f 100644 --- a/frontend-voice/src/App.jsx +++ b/frontend-voice/src/App.jsx @@ -19,11 +19,10 @@ const DEFAULT_TTS_SYNC_CPS = 15; const TTS_SYNC_CPS_MIN = 8; const TTS_SYNC_CPS_MAX = 30; -const deriveAppState = (mode, backend, responseActive = false) => { +const deriveAppState = (sessionActive, backend, responseActive = false) => { + if (!sessionActive) return 'IDLE'; if (responseActive) return 'SPEAKING'; - if (backend === 'PROCESSING' || backend === 'SPEAKING') return backend; - if (mode === 'PAUSED') return 'PAUSED'; - if (mode === 'FRESH') return 'FRESH'; + if (backend === 'PROCESSING' || backend === 'SPEAKING') return 'SPEAKING'; return 'LISTENING'; }; @@ -71,13 +70,11 @@ function App() { } }); - // UI modes: FRESH (never started), ACTIVE (listening/processing/speaking), PAUSED (user paused) - const [uiMode, setUiModeState] = useState('FRESH'); - const uiModeRef = useRef('FRESH'); - // Backend states: IDLE, LISTENING, PROCESSING, SPEAKING const [backendState, setBackendStateState] = useState('IDLE'); const backendStateRef = useRef('IDLE'); + const [sessionActive, setSessionActiveState] = useState(false); + const sessionActiveRef = useRef(false); const responseRef = useRef(''); const displayedResponseRef = useRef(''); @@ -90,7 +87,6 @@ function App() { const streamCarryRef = useRef(0); const fadeTimeoutRef = useRef(null); const pendingFadeRef = useRef(false); - const hasAutoStartedRef = useRef(false); const audioContextRef = useRef(null); const nextPlayTimeRef = useRef(0); const ttsSampleRateRef = useRef(DEFAULT_TTS_SAMPLE_RATE); @@ -104,7 +100,6 @@ function App() { const autoScrollRef = useRef(true); // Inactivity timeout refs - const pauseTimeoutRef = useRef(null); const listenTimeoutRef = useRef(null); const sttDraftRef = useRef(sttDraft); // Keep ref in sync for use in callbacks const streamSpeedRef = useRef(streamSpeedCps); @@ -130,21 +125,19 @@ function App() { initMic, releaseMic, startNewConversation, - resumeListening, - pauseListening, handleSessionReady, } = useAudioCapture(sendMessage, readyState, VOICE_CONFIG.audio); - const setUiMode = (nextMode) => { - uiModeRef.current = nextMode; - setUiModeState(nextMode); - }; - const setBackendState = (nextState) => { backendStateRef.current = nextState; setBackendStateState(nextState); }; + const setSessionActive = (nextState) => { + sessionActiveRef.current = nextState; + setSessionActiveState(nextState); + }; + const setResponseActive = useCallback((nextActive) => { responseActiveRef.current = nextActive; setIsResponseActiveState(nextActive); @@ -345,34 +338,10 @@ function App() { } }, [getAudioContext, sendMessage]); - const appState = deriveAppState(uiMode, backendState, isResponseActive); - - // Auto-start on first connect - run only ONCE - useEffect(() => { - if (document.visibilityState === 'hidden') return; - if (readyState === 1 && !hasAutoStartedRef.current) { - hasAutoStartedRef.current = true; - console.log('🎤 Auto-starting (one-time)...'); - initMic().then(ok => { - if (ok) { - setUiMode('ACTIVE'); - setBackendState('IDLE'); - setTextVisible(true); - startNewConversation(); - } - }); - } - // Intentionally minimal deps - this should only run once - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [readyState]); + const appState = deriveAppState(sessionActive, backendState, isResponseActive); const scheduleFade = useCallback(() => { if (fadeTimeoutRef.current) clearTimeout(fadeTimeoutRef.current); - // Don't fade if paused - user may want to read the text - if (uiModeRef.current === 'PAUSED') { - pendingFadeRef.current = false; - return; - } const isStreaming = responseRef.current && displayedResponseRef.current.length < responseRef.current.length; if (isStreaming) { @@ -425,7 +394,7 @@ function App() { responseCompleteRef.current = false; setResponseActive(false); setDisplayedResponse(''); - // Always schedule the fade after streaming completes (3 seconds, unless paused) + // Always schedule the fade after streaming completes. pendingFadeRef.current = false; scheduleFade(); }, [pushMessage, scheduleFade, setDisplayedResponse, setLatestExchange, setResponseActive]); @@ -571,10 +540,6 @@ function App() { }, []); const clearInactivityTimeouts = useCallback(() => { - if (pauseTimeoutRef.current) { - clearTimeout(pauseTimeoutRef.current); - pauseTimeoutRef.current = null; - } if (listenTimeoutRef.current) { clearTimeout(listenTimeoutRef.current); listenTimeoutRef.current = null; @@ -582,7 +547,7 @@ function App() { clearTimeoutCountdown(); }, [clearTimeoutCountdown]); - const startTimeoutCountdown = useCallback((seconds, mode) => { + const startTimeoutCountdown = useCallback((seconds) => { clearTimeoutCountdown(); const totalSeconds = Number(seconds); if (!Number.isFinite(totalSeconds) || totalSeconds <= 0) return; @@ -594,13 +559,13 @@ function App() { countdownTimeoutRef.current = setTimeout(() => { let remaining = startValue; - setTimeoutCountdown({ remaining, mode }); + setTimeoutCountdown(remaining); countdownIntervalRef.current = setInterval(() => { remaining -= 1; if (remaining <= 0) { clearTimeoutCountdown(); } else { - setTimeoutCountdown({ remaining, mode }); + setTimeoutCountdown(remaining); } }, 1000); }, startDelayMs); @@ -631,7 +596,7 @@ function App() { }, [clearInactivityTimeouts, finalizePartialResponse, readyState, sendMessage, stopTtsPlayback]); const resetSession = useCallback((options = {}) => { - const { clearMessages = false, fadeText = false, resetAutoStart = true } = options; + const { clearMessages = false, fadeText = false } = options; clearInactivityTimeouts(); cancelFade(); @@ -641,11 +606,7 @@ function App() { stopTtsPlayback(); releaseMic(); - if (resetAutoStart) { - hasAutoStartedRef.current = false; - } - - setUiMode('FRESH'); + setSessionActive(false); setBackendState('IDLE'); setCurrentTranscript(''); setDisplayedResponse(''); @@ -687,6 +648,7 @@ function App() { // Schedule listen timeout (when listening with no speech) const scheduleListenTimeout = useCallback((options = {}) => { + if (!sessionActiveRef.current) return; if (responseActiveRef.current) return; clearInactivityTimeouts(); const seconds = sttDraftRef.current.listen_timeout_seconds; @@ -696,32 +658,17 @@ function App() { if (shouldLog) { console.log(`⏱️ Starting listen timeout: ${seconds}s`); } - startTimeoutCountdown(seconds, 'listen'); + startTimeoutCountdown(seconds); listenTimeoutRef.current = setTimeout(() => { console.log('⏰ Listen timeout expired'); handleInactivityTimeout(); }, seconds * 1000); }, [clearInactivityTimeouts, handleInactivityTimeout, startTimeoutCountdown]); - // Schedule pause timeout (when paused) - const schedulePauseTimeout = useCallback(() => { - clearInactivityTimeouts(); - const seconds = sttDraftRef.current.pause_timeout_seconds; - if (seconds <= 0) return; // Disabled - - console.log(`⏱️ Starting pause timeout: ${seconds}s`); - startTimeoutCountdown(seconds, 'pause'); - pauseTimeoutRef.current = setTimeout(() => { - console.log('⏰ Pause timeout expired'); - handleInactivityTimeout(); - }, seconds * 1000); - }, [clearInactivityTimeouts, handleInactivityTimeout, startTimeoutCountdown]); - useEffect(() => { const wasActive = responseActivePrevRef.current; responseActivePrevRef.current = isResponseActive; if (!wasActive || isResponseActive) return; - if (uiModeRef.current !== 'ACTIVE') return; const backend = backendStateRef.current; if (backend === 'LISTENING' || backend === 'IDLE') { scheduleListenTimeout(); @@ -780,41 +727,24 @@ function App() { if (msg.type === 'state') { const s = msg.state; const previousBackendState = backendStateRef.current; - const currentAppState = deriveAppState( - uiModeRef.current, - backendStateRef.current, - responseActiveRef.current, - ); - console.log('Backend state:', s, '| appState:', currentAppState); setBackendState(s); if (s === 'LISTENING') { - // Only update UI if not paused - if (uiModeRef.current !== 'PAUSED') { - if (uiModeRef.current === 'FRESH') { - setUiMode('ACTIVE'); - } - if (previousBackendState !== 'LISTENING') { - clearTranscriptForListening(); - } - cancelFade(); - setTextVisible(true); - scheduleListenTimeout(); + setSessionActive(true); + if (previousBackendState !== 'LISTENING') { + clearTranscriptForListening(); } - // DON'T send any messages to backend - just update UI + cancelFade(); + setTextVisible(true); + scheduleListenTimeout(); } else if (s === 'PROCESSING' || s === 'SPEAKING') { // Clear timeouts while processing/speaking - don't timeout during activity clearInactivityTimeouts(); - if (uiModeRef.current !== 'ACTIVE') { - setUiMode('ACTIVE'); - } + setSessionActive(true); } else if (s === 'IDLE') { // Keep UI mode as-is; just fade the transcript after idle. - if (uiModeRef.current !== 'PAUSED') { - scheduleFade(); - } - // If ACTIVE (not paused/fresh), schedule listen timeout - if (uiModeRef.current === 'ACTIVE' && previousBackendState !== 'LISTENING') { + scheduleFade(); + if (sessionActiveRef.current && previousBackendState !== 'LISTENING') { scheduleListenTimeout(); } } @@ -822,7 +752,7 @@ function App() { if (msg.type === 'transcript') { // Reset listen timeout on any transcript (user is speaking) - if (uiModeRef.current === 'ACTIVE' && backendStateRef.current === 'LISTENING') { + if (sessionActiveRef.current && backendStateRef.current === 'LISTENING') { scheduleListenTimeout({ shouldLog: false }); } setCurrentTranscript(msg.text || ''); @@ -963,50 +893,29 @@ function App() { }; }, [showSettings]); - // Handle tap - pause/resume + // Handle tap const handleTap = () => { primeAudioContext(); if (showHistory || showSettings) return; const currentAppState = deriveAppState( - uiModeRef.current, + sessionActiveRef.current, backendStateRef.current, responseActiveRef.current, ); - if (currentAppState === 'PROCESSING' || currentAppState === 'SPEAKING') { + if (currentAppState === 'SPEAKING') { console.log('🎤 TAP: INTERRUPT'); interruptResponse(); return; } - if (currentAppState === 'LISTENING') { - console.log('🎤 TAP: PAUSE'); - clearInactivityTimeouts(); - cancelFade(); - setUiMode('PAUSED'); - pauseListening(); - // Schedule pause timeout - schedulePauseTimeout(); - } else if (currentAppState === 'PAUSED') { - console.log('🎤 TAP: RESUME'); - clearInactivityTimeouts(); - cancelFade(); - clearTranscriptForListening(); - setUiMode('ACTIVE'); - setBackendState('IDLE'); - setTextVisible(true); - initMic().then(ok => { - if (ok) { - resumeListening(); - } - }); - } else if (currentAppState === 'FRESH') { + if (currentAppState === 'IDLE') { console.log('🎤 TAP: FIRST START'); clearInactivityTimeouts(); cancelFade(); clearTranscriptForListening(); - setUiMode('ACTIVE'); setBackendState('IDLE'); + setSessionActive(true); setTextVisible(true); initMic().then(ok => { if (ok) { @@ -1048,7 +957,7 @@ function App() { backgroundedRef.current = true; setShowHistory(false); setShowSettings(false); - resetSession({ resetAutoStart: false }); + resetSession(); }, [resetSession]); useEffect(() => { @@ -1079,10 +988,6 @@ function App() { clearTimeout(fadeTimeoutRef.current); fadeTimeoutRef.current = null; } - if (pauseTimeoutRef.current) { - clearTimeout(pauseTimeoutRef.current); - pauseTimeoutRef.current = null; - } if (listenTimeoutRef.current) { clearTimeout(listenTimeoutRef.current); listenTimeoutRef.current = null; @@ -1108,7 +1013,7 @@ function App() { const defaultSettings = { eot_timeout_ms: 1000, // 1 second - natural conversation pace eot_threshold: 0.7, // balanced confidence threshold - pause_timeout_seconds: 30, // 30 seconds when paused + pause_timeout_seconds: 30, listen_timeout_seconds: 15, // 15 seconds of no speech }; @@ -1197,18 +1102,14 @@ function App() { const getOrbClass = () => { if (appState === 'LISTENING') return 'listening'; - if (appState === 'PROCESSING') return 'processing'; if (appState === 'SPEAKING') return 'speaking'; - if (appState === 'PAUSED') return 'paused'; return 'idle'; }; const getStatusText = () => { if (!isConnected) return 'Connecting...'; if (appState === 'LISTENING') return 'Listening...'; - if (appState === 'PROCESSING') return 'Thinking...'; if (appState === 'SPEAKING') return ''; - if (appState === 'PAUSED') return 'Paused'; return 'Tap to start'; }; @@ -1252,12 +1153,12 @@ function App() {
-
+
{getStatusText()}
- - {timeoutCountdown.mode === 'pause' ? 'Paused' : 'No speech'} - + No speech | Session ends in
- - {timeoutCountdown.remaining} + + {timeoutCountdown} s
@@ -1425,25 +1324,6 @@ function App() { />
-
-
Pause timeout
-
- {sttDraft.pause_timeout_seconds === 0 ? 'Disabled' : `${sttDraft.pause_timeout_seconds}s`} -
- setSttDraft(prev => ({ - ...prev, - pause_timeout_seconds: Number(e.target.value), - }))} - /> -
-
Listen timeout
diff --git a/frontend-voice/src/hooks/useAudioCapture.js b/frontend-voice/src/hooks/useAudioCapture.js index c11c1b5..aee9100 100644 --- a/frontend-voice/src/hooks/useAudioCapture.js +++ b/frontend-voice/src/hooks/useAudioCapture.js @@ -51,27 +51,17 @@ export default function useAudioCapture(sendMessage, readyState, options = {}) { const inputSampleRateRef = useRef(audioConfig.targetSampleRate); const sessionReadyRef = useRef(false); const pendingBuffersRef = useRef([]); - const pausedRef = useRef(false); const readyStateRef = useRef(readyState); useEffect(() => { readyStateRef.current = readyState; }, [readyState]); - const setProcessorPaused = useCallback((nextPaused) => { - const node = processorRef.current; - if (node && node.port && typeof node.port.postMessage === 'function') { - node.port.postMessage({ type: 'pause', value: nextPaused }); - } - }, []); - // Release microphone completely const releaseMic = useCallback(() => { console.log('🎤 releaseMic called'); sessionReadyRef.current = false; - pausedRef.current = false; pendingBuffersRef.current = []; - setProcessorPaused(false); if (processorRef.current) { if (processorRef.current.port) { @@ -88,10 +78,9 @@ export default function useAudioCapture(sendMessage, readyState, options = {}) { streamRef.current.getTracks().forEach(t => t.stop()); streamRef.current = null; } - }, [setProcessorPaused]); + }, []); const processAudioFrame = useCallback((float32) => { - if (pausedRef.current) return; if (!float32 || float32.length === 0) return; const inputSampleRate = inputSampleRateRef.current; @@ -181,7 +170,6 @@ export default function useAudioCapture(sendMessage, readyState, options = {}) { } }; processor = workletNode; - workletNode.port.postMessage({ type: 'pause', value: pausedRef.current }); } catch (err) { console.warn('🎤 AudioWorklet unavailable, falling back to ScriptProcessor:', err); } @@ -231,57 +219,15 @@ export default function useAudioCapture(sendMessage, readyState, options = {}) { } sessionReadyRef.current = false; - pausedRef.current = false; pendingBuffersRef.current = []; - setProcessorPaused(false); // This tells backend to clear history and start fresh STT session sendMessage(JSON.stringify({ type: 'wakeword_detected', confidence: 1.0 })); return true; - }, [sendMessage, setProcessorPaused]); - - // Resume listening (keeps history on backend) - const resumeListening = useCallback(() => { - console.log('🎤 resumeListening called'); - - if (readyStateRef.current !== ReadyState.OPEN) { - console.log('🎤 WebSocket not ready, skipping'); - return false; - } - - pausedRef.current = false; - sessionReadyRef.current = false; - setProcessorPaused(false); - - // Resume existing session without clearing history - sendMessage(JSON.stringify({ type: 'resume_listening' })); - return true; - }, [sendMessage, setProcessorPaused]); - - // Pause listening (keeps session alive on backend with KeepAlive) - const pauseListening = useCallback(() => { - console.log('🎤 pauseListening called'); - - if (readyStateRef.current !== ReadyState.OPEN) { - console.log('🎤 WebSocket not ready, skipping'); - return false; - } - - pausedRef.current = true; - sessionReadyRef.current = false; - pendingBuffersRef.current = []; - setProcessorPaused(true); - sendMessage(JSON.stringify({ type: 'pause_listening' })); - return true; - }, [sendMessage, setProcessorPaused]); + }, [sendMessage]); // Backend signals STT session is ready const handleSessionReady = useCallback(() => { - if (pausedRef.current) { - console.log('🎤 Session ready while paused, waiting for resume'); - return; - } - console.log('🎤 Session ready, flushing', pendingBuffersRef.current.length, 'pending buffers'); sessionReadyRef.current = true; @@ -297,8 +243,6 @@ export default function useAudioCapture(sendMessage, readyState, options = {}) { initMic, releaseMic, startNewConversation, - resumeListening, - pauseListening, handleSessionReady, }; }