From 3c7a48df5b2a27114f14bd08ea57d4b536e11b99 Mon Sep 17 00:00:00 2001
From: Sergei <serter2069@gmail.com>
Date: Tue, 27 Jan 2026 16:34:07 -0800
Subject: [PATCH] Integrate TTS interruption in VoiceFAB when voice detected
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add onVoiceDetected callback to useSpeechRecognition hook
  - Triggered on first interim result (voice activity detected)
  - Uses voiceDetectedRef to ensure callback fires only once per session
  - Reset flag on session start/end

- Connect STT to VoiceContext in _layout.tsx
  - Use useSpeechRecognition with onVoiceDetected callback
  - Call interruptIfSpeaking() when voice detected during 'speaking' state
  - Forward STT results to VoiceContext (setTranscript, sendTranscript)
  - Start/stop STT based on isListening state

- Export interruptIfSpeaking from VoiceContext provider

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 app/(tabs)/_layout.tsx        | 56 +++++++++++++++++++++++++++++++++--
 contexts/VoiceContext.tsx     |  3 ++
 hooks/useSpeechRecognition.ts | 14 +++++++++
 3 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/app/(tabs)/_layout.tsx b/app/(tabs)/_layout.tsx
index 007d12e..5328e39 100644
--- a/app/(tabs)/_layout.tsx
+++ b/app/(tabs)/_layout.tsx
@@ -1,5 +1,5 @@
 import { Tabs } from 'expo-router';
-import React, { useCallback } from 'react';
+import React, { useCallback, useEffect } from 'react';
 import { Platform, View } from 'react-native';
 import { Feather } from '@expo/vector-icons';
 import { useSafeAreaInsets } from 'react-native-safe-area-context';
@@ -10,6 +10,7 @@ import { AppColors } from '@/constants/theme';
 import { useColorScheme } from '@/hooks/use-color-scheme';
 import { useVoiceCall } from '@/contexts/VoiceCallContext';
 import { useVoice } from '@/contexts/VoiceContext';
+import { useSpeechRecognition } from '@/hooks/useSpeechRecognition';
 
 export default function TabLayout() {
   const colorScheme = useColorScheme();
@@ -18,8 +19,57 @@ export default function TabLayout() {
   // VoiceFAB uses VoiceCallContext internally to hide when call is active
   useVoiceCall(); // Ensure context is available
 
-  // Voice context for listening mode toggle
-  const { isListening, startSession, stopSession } = useVoice();
+  // Voice context for listening mode toggle and TTS interruption
+  const {
+    isListening,
+    status,
+    startSession,
+    stopSession,
+    interruptIfSpeaking,
+    setTranscript,
+    setPartialTranscript,
+    sendTranscript,
+  } = useVoice();
+
+  // Callback for voice detection - interrupt TTS when user speaks
+  const handleVoiceDetected = useCallback(() => {
+    // Interrupt TTS when user starts speaking during 'speaking' state
+    if (status === 'speaking') {
+      console.log('[TabLayout] Voice detected during speaking - interrupting TTS');
+      interruptIfSpeaking();
+    }
+  }, [status, interruptIfSpeaking]);
+
+  // Callback for STT results
+  const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => {
+    if (isFinal) {
+      setTranscript(transcript);
+      // Send to API when final result is received
+      sendTranscript(transcript);
+    } else {
+      setPartialTranscript(transcript);
+    }
+  }, [setTranscript, setPartialTranscript, sendTranscript]);
+
+  // Speech recognition with voice detection callback
+  const {
+    startListening,
+    stopListening,
+  } = useSpeechRecognition({
+    continuous: true,
+    interimResults: true,
+    onVoiceDetected: handleVoiceDetected,
+    onResult: handleSpeechResult,
+  });
+
+  // Start/stop STT when voice session starts/stops
+  useEffect(() => {
+    if (isListening) {
+      startListening();
+    } else {
+      stopListening();
+    }
+  }, [isListening, startListening, stopListening]);
 
   // Handle voice FAB press - toggle listening mode
   const handleVoiceFABPress = useCallback(() => {
diff --git a/contexts/VoiceContext.tsx b/contexts/VoiceContext.tsx
index f7b0c47..a32bbb6 100644
--- a/contexts/VoiceContext.tsx
+++ b/contexts/VoiceContext.tsx
@@ -131,6 +131,8 @@ interface VoiceContextValue {
   speak: (text: string) => Promise<void>;
   // Stop TTS
   stopSpeaking: () => void;
+  // Interrupt TTS if speaking (call when user starts talking)
+  interruptIfSpeaking: () => boolean;
 }
 
 const VoiceContext = createContext<VoiceContextValue | undefined>(undefined);
@@ -381,6 +383,7 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
         setIsSpeaking,
         speak,
         stopSpeaking,
+        interruptIfSpeaking,
       }}
     >
       {children}
diff --git a/hooks/useSpeechRecognition.ts b/hooks/useSpeechRecognition.ts
index 987b22a..a84501f 100644
--- a/hooks/useSpeechRecognition.ts
+++ b/hooks/useSpeechRecognition.ts
@@ -42,6 +42,8 @@ export interface UseSpeechRecognitionOptions {
   onStart?: () => void;
   /** Callback when speech recognition ends */
   onEnd?: () => void;
+  /** Callback when voice activity is detected (first interim result) - useful for interrupting TTS */
+  onVoiceDetected?: () => void;
 }
 
 export interface UseSpeechRecognitionReturn {
@@ -77,6 +79,7 @@ export function useSpeechRecognition(
     onError,
     onStart,
     onEnd,
+    onVoiceDetected,
   } = options;
 
   const [isListening, setIsListening] = useState(false);
@@ -87,6 +90,8 @@ export function useSpeechRecognition(
 
   // Track if we're in the middle of starting to prevent double-starts
   const isStartingRef = useRef(false);
+  // Track if voice has been detected in current session (for onVoiceDetected callback)
+  const voiceDetectedRef = useRef(false);
 
   // Check availability on mount
   useEffect(() => {
@@ -111,6 +116,7 @@ export function useSpeechRecognition(
     setIsListening(true);
     setError(null);
     isStartingRef.current = false;
+    voiceDetectedRef.current = false; // Reset voice detection flag for new session
     onStart?.();
   });
 
@@ -120,6 +126,7 @@ export function useSpeechRecognition(
     setIsListening(false);
     setPartialTranscript('');
     isStartingRef.current = false;
+    voiceDetectedRef.current = false; // Reset for next session
     onEnd?.();
   });
 
@@ -133,6 +140,13 @@ export function useSpeechRecognition(
 
       console.log('[SpeechRecognition] Result:', transcript.slice(0, 50), 'final:', isFinal);
 
+      // Trigger onVoiceDetected on first result (voice activity detected)
+      if (!voiceDetectedRef.current && transcript.length > 0) {
+        voiceDetectedRef.current = true;
+        console.log('[SpeechRecognition] Voice activity detected');
+        onVoiceDetected?.();
+      }
+
       if (isFinal) {
         setRecognizedText(transcript);
         setPartialTranscript('');