From 3ef1d8e54ccadcf470074192b7233a1e8fa23448 Mon Sep 17 00:00:00 2001 From: Sergei Date: Tue, 27 Jan 2026 16:49:19 -0800 Subject: [PATCH] Allow user to interrupt Julia voice by speaking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Enable STT listening during TTS playback to detect user interruption - When voice detected during Julia's speech, immediately stop TTS - Store interrupted transcript and process it after TTS stops - Remove 'speaking' status check from STT watchdog to allow parallel STT+TTS - Add pending transcript mechanism to handle race condition between TTS stop and STT final result 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- app/(tabs)/_layout.tsx | 54 +++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/app/(tabs)/_layout.tsx b/app/(tabs)/_layout.tsx index 7ccf30a..bd0fdf1 100644 --- a/app/(tabs)/_layout.tsx +++ b/app/(tabs)/_layout.tsx @@ -36,13 +36,18 @@ export default function TabLayout() { const sessionActiveRef = useRef(false); // Track if we need to restart STT after it ends during active session const shouldRestartSTTRef = useRef(false); + // Track pending transcript from interruption (to send after TTS stops) + const pendingInterruptTranscriptRef = useRef(null); // Callback for voice detection - interrupt TTS when user speaks const handleVoiceDetected = useCallback(() => { // Interrupt TTS when user starts speaking during 'speaking' state if (status === 'speaking' || isSpeaking) { - console.log('[TabLayout] Voice detected during TTS playback - interrupting'); - interruptIfSpeaking(); + console.log('[TabLayout] Voice detected during TTS playback - INTERRUPTING Julia'); + const wasInterrupted = interruptIfSpeaking(); + if (wasInterrupted) { + console.log('[TabLayout] TTS interrupted successfully, now listening to user'); + } } }, [status, isSpeaking, interruptIfSpeaking]); @@ -59,14 +64,15 @@ export default function TabLayout() { // Callback for STT results const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => { if (isFinal) { - // Only process final results when NOT speaking (avoid processing interrupted speech) - if (!isSpeaking && status !== 'speaking') { - setTranscript(transcript); - // Send to API when final result is received - sendTranscript(transcript); + // Check if we're still in speaking mode (user interrupted Julia) + if (isSpeaking || status === 'speaking') { + // Store the transcript to send after TTS fully stops + console.log('[TabLayout] Got final result while TTS playing - storing for after interruption:', transcript); + pendingInterruptTranscriptRef.current = transcript; } else { - // Got final result while speaking - this is the interruption - console.log('[TabLayout] Got final result while TTS playing - user interrupted'); + // Normal case: not speaking, send immediately + setTranscript(transcript); + sendTranscript(transcript); } } else { setPartialTranscript(transcript); @@ -126,14 +132,25 @@ export default function TabLayout() { const prevStatusRef = useRef('idle'); // Auto-restart STT when TTS finishes (status changes from 'speaking' to 'listening') + // Also process any pending transcript from user interruption useEffect(() => { const prevStatus = prevStatusRef.current; prevStatusRef.current = status; - // When transitioning from speaking to listening, restart STT + // When transitioning from speaking to listening, handle pending interrupt transcript if (prevStatus === 'speaking' && status === 'listening' && sessionActiveRef.current) { - console.log('[TabLayout] TTS finished - auto-restarting STT'); - // Small delay to ensure TTS cleanup is complete + console.log('[TabLayout] TTS finished/interrupted - checking for pending transcript'); + + // Process pending transcript from interruption if any + const pendingTranscript = pendingInterruptTranscriptRef.current; + if (pendingTranscript) { + console.log('[TabLayout] Processing pending interrupt transcript:', pendingTranscript); + pendingInterruptTranscriptRef.current = null; + setTranscript(pendingTranscript); + sendTranscript(pendingTranscript); + } + + // Small delay to ensure TTS cleanup is complete, then restart STT const timer = setTimeout(() => { if (sessionActiveRef.current && !sttIsListening) { startListening(); @@ -141,7 +158,7 @@ export default function TabLayout() { }, 200); return () => clearTimeout(timer); } - }, [status, sttIsListening, startListening]); + }, [status, sttIsListening, startListening, setTranscript, sendTranscript]); // ============================================================================ // TAB NAVIGATION PERSISTENCE @@ -154,18 +171,20 @@ export default function TabLayout() { // Monitor and recover STT state during tab navigation // If session is active but STT stopped unexpectedly, restart it + // IMPORTANT: STT should run DURING TTS playback to detect user interruption! useEffect(() => { // Check every 500ms if STT needs to be restarted const intervalId = setInterval(() => { // Only act if session should be active (isListening from VoiceContext) - // but STT is not actually listening, and we're not in speaking/processing mode + // but STT is not actually listening + // Note: We DO want STT running during 'speaking' to detect interruption! + // Only skip during 'processing' (API call in progress) if ( sessionActiveRef.current && !sttIsListening && - status !== 'speaking' && status !== 'processing' ) { - console.log('[TabLayout] STT watchdog: restarting STT (session active but STT stopped)'); + console.log('[TabLayout] STT watchdog: restarting STT (session active but STT stopped, status:', status, ')'); startListening(); } }, 500); @@ -179,8 +198,9 @@ export default function TabLayout() { const handleAppStateChange = (nextAppState: AppStateStatus) => { if (nextAppState === 'active' && sessionActiveRef.current) { // App came to foreground, give it a moment then check STT + // STT should run even during 'speaking' to detect user interruption setTimeout(() => { - if (sessionActiveRef.current && !sttIsListening && status !== 'speaking' && status !== 'processing') { + if (sessionActiveRef.current && !sttIsListening && status !== 'processing') { console.log('[TabLayout] App foregrounded - restarting STT'); startListening(); }