From 3c7a48df5b2a27114f14bd08ea57d4b536e11b99 Mon Sep 17 00:00:00 2001 From: Sergei Date: Tue, 27 Jan 2026 16:34:07 -0800 Subject: [PATCH] Integrate TTS interruption in VoiceFAB when voice detected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add onVoiceDetected callback to useSpeechRecognition hook - Triggered on first interim result (voice activity detected) - Uses voiceDetectedRef to ensure callback fires only once per session - Reset flag on session start/end - Connect STT to VoiceContext in _layout.tsx - Use useSpeechRecognition with onVoiceDetected callback - Call interruptIfSpeaking() when voice detected during 'speaking' state - Forward STT results to VoiceContext (setTranscript, sendTranscript) - Start/stop STT based on isListening state - Export interruptIfSpeaking from VoiceContext provider 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- app/(tabs)/_layout.tsx | 56 +++++++++++++++++++++++++++++++++-- contexts/VoiceContext.tsx | 3 ++ hooks/useSpeechRecognition.ts | 14 +++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/app/(tabs)/_layout.tsx b/app/(tabs)/_layout.tsx index 007d12e..5328e39 100644 --- a/app/(tabs)/_layout.tsx +++ b/app/(tabs)/_layout.tsx @@ -1,5 +1,5 @@ import { Tabs } from 'expo-router'; -import React, { useCallback } from 'react'; +import React, { useCallback, useEffect } from 'react'; import { Platform, View } from 'react-native'; import { Feather } from '@expo/vector-icons'; import { useSafeAreaInsets } from 'react-native-safe-area-context'; @@ -10,6 +10,7 @@ import { AppColors } from '@/constants/theme'; import { useColorScheme } from '@/hooks/use-color-scheme'; import { useVoiceCall } from '@/contexts/VoiceCallContext'; import { useVoice } from '@/contexts/VoiceContext'; +import { useSpeechRecognition } from '@/hooks/useSpeechRecognition'; export default function TabLayout() { const colorScheme = useColorScheme(); @@ -18,8 +19,57 @@ export default function TabLayout() { // VoiceFAB uses VoiceCallContext internally to hide when call is active useVoiceCall(); // Ensure context is available - // Voice context for listening mode toggle - const { isListening, startSession, stopSession } = useVoice(); + // Voice context for listening mode toggle and TTS interruption + const { + isListening, + status, + startSession, + stopSession, + interruptIfSpeaking, + setTranscript, + setPartialTranscript, + sendTranscript, + } = useVoice(); + + // Callback for voice detection - interrupt TTS when user speaks + const handleVoiceDetected = useCallback(() => { + // Interrupt TTS when user starts speaking during 'speaking' state + if (status === 'speaking') { + console.log('[TabLayout] Voice detected during speaking - interrupting TTS'); + interruptIfSpeaking(); + } + }, [status, interruptIfSpeaking]); + + // Callback for STT results + const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => { + if (isFinal) { + setTranscript(transcript); + // Send to API when final result is received + sendTranscript(transcript); + } else { + setPartialTranscript(transcript); + } + }, [setTranscript, setPartialTranscript, sendTranscript]); + + // Speech recognition with voice detection callback + const { + startListening, + stopListening, + } = useSpeechRecognition({ + continuous: true, + interimResults: true, + onVoiceDetected: handleVoiceDetected, + onResult: handleSpeechResult, + }); + + // Start/stop STT when voice session starts/stops + useEffect(() => { + if (isListening) { + startListening(); + } else { + stopListening(); + } + }, [isListening, startListening, stopListening]); // Handle voice FAB press - toggle listening mode const handleVoiceFABPress = useCallback(() => { diff --git a/contexts/VoiceContext.tsx b/contexts/VoiceContext.tsx index f7b0c47..a32bbb6 100644 --- a/contexts/VoiceContext.tsx +++ b/contexts/VoiceContext.tsx @@ -131,6 +131,8 @@ interface VoiceContextValue { speak: (text: string) => Promise; // Stop TTS stopSpeaking: () => void; + // Interrupt TTS if speaking (call when user starts talking) + interruptIfSpeaking: () => boolean; } const VoiceContext = createContext(undefined); @@ -381,6 +383,7 @@ export function VoiceProvider({ children }: { children: ReactNode }) { setIsSpeaking, speak, stopSpeaking, + interruptIfSpeaking, }} > {children} diff --git a/hooks/useSpeechRecognition.ts b/hooks/useSpeechRecognition.ts index 987b22a..a84501f 100644 --- a/hooks/useSpeechRecognition.ts +++ b/hooks/useSpeechRecognition.ts @@ -42,6 +42,8 @@ export interface UseSpeechRecognitionOptions { onStart?: () => void; /** Callback when speech recognition ends */ onEnd?: () => void; + /** Callback when voice activity is detected (first interim result) - useful for interrupting TTS */ + onVoiceDetected?: () => void; } export interface UseSpeechRecognitionReturn { @@ -77,6 +79,7 @@ export function useSpeechRecognition( onError, onStart, onEnd, + onVoiceDetected, } = options; const [isListening, setIsListening] = useState(false); @@ -87,6 +90,8 @@ export function useSpeechRecognition( // Track if we're in the middle of starting to prevent double-starts const isStartingRef = useRef(false); + // Track if voice has been detected in current session (for onVoiceDetected callback) + const voiceDetectedRef = useRef(false); // Check availability on mount useEffect(() => { @@ -111,6 +116,7 @@ export function useSpeechRecognition( setIsListening(true); setError(null); isStartingRef.current = false; + voiceDetectedRef.current = false; // Reset voice detection flag for new session onStart?.(); }); @@ -120,6 +126,7 @@ export function useSpeechRecognition( setIsListening(false); setPartialTranscript(''); isStartingRef.current = false; + voiceDetectedRef.current = false; // Reset for next session onEnd?.(); }); @@ -133,6 +140,13 @@ export function useSpeechRecognition( console.log('[SpeechRecognition] Result:', transcript.slice(0, 50), 'final:', isFinal); + // Trigger onVoiceDetected on first result (voice activity detected) + if (!voiceDetectedRef.current && transcript.length > 0) { + voiceDetectedRef.current = true; + console.log('[SpeechRecognition] Voice activity detected'); + onVoiceDetected?.(); + } + if (isFinal) { setRecognizedText(transcript); setPartialTranscript('');