Keep STT listening during TTS playback for interruption detection

- Add sessionActiveRef to track when voice session is active
- Add shouldRestartSTTRef to auto-restart STT after it ends
- STT now continues listening during TTS playback
- Voice detection callback checks both status and isSpeaking
- Final results during TTS are ignored (user interrupted)
- STT automatically restarts after ending if session is still active

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Sergei 2026-01-27 16:36:08 -08:00
parent 3c7a48df5b
commit 59f1f088ed

View File

@ -1,5 +1,5 @@
import { Tabs } from 'expo-router'; import { Tabs } from 'expo-router';
import React, { useCallback, useEffect } from 'react'; import React, { useCallback, useEffect, useRef } from 'react';
import { Platform, View } from 'react-native'; import { Platform, View } from 'react-native';
import { Feather } from '@expo/vector-icons'; import { Feather } from '@expo/vector-icons';
import { useSafeAreaInsets } from 'react-native-safe-area-context'; import { useSafeAreaInsets } from 'react-native-safe-area-context';
@ -22,6 +22,7 @@ export default function TabLayout() {
// Voice context for listening mode toggle and TTS interruption // Voice context for listening mode toggle and TTS interruption
const { const {
isListening, isListening,
isSpeaking,
status, status,
startSession, startSession,
stopSession, stopSession,
@ -31,46 +32,95 @@ export default function TabLayout() {
sendTranscript, sendTranscript,
} = useVoice(); } = useVoice();
// Track whether session is active (listening mode on, even during TTS)
const sessionActiveRef = useRef(false);
// Track if we need to restart STT after it ends during active session
const shouldRestartSTTRef = useRef(false);
// Callback for voice detection - interrupt TTS when user speaks // Callback for voice detection - interrupt TTS when user speaks
const handleVoiceDetected = useCallback(() => { const handleVoiceDetected = useCallback(() => {
// Interrupt TTS when user starts speaking during 'speaking' state // Interrupt TTS when user starts speaking during 'speaking' state
if (status === 'speaking') { if (status === 'speaking' || isSpeaking) {
console.log('[TabLayout] Voice detected during speaking - interrupting TTS'); console.log('[TabLayout] Voice detected during TTS playback - interrupting');
interruptIfSpeaking(); interruptIfSpeaking();
} }
}, [status, interruptIfSpeaking]); }, [status, isSpeaking, interruptIfSpeaking]);
// Callback when STT ends - may need to restart if session is still active
const handleSTTEnd = useCallback(() => {
console.log('[TabLayout] STT ended, sessionActive:', sessionActiveRef.current);
// If session is still active (user didn't stop it), we should restart STT
// This ensures STT continues during and after TTS playback
if (sessionActiveRef.current) {
shouldRestartSTTRef.current = true;
}
}, []);
// Callback for STT results // Callback for STT results
const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => { const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => {
if (isFinal) { if (isFinal) {
// Only process final results when NOT speaking (avoid processing interrupted speech)
if (!isSpeaking && status !== 'speaking') {
setTranscript(transcript); setTranscript(transcript);
// Send to API when final result is received // Send to API when final result is received
sendTranscript(transcript); sendTranscript(transcript);
} else {
// Got final result while speaking - this is the interruption
console.log('[TabLayout] Got final result while TTS playing - user interrupted');
}
} else { } else {
setPartialTranscript(transcript); setPartialTranscript(transcript);
} }
}, [setTranscript, setPartialTranscript, sendTranscript]); }, [setTranscript, setPartialTranscript, sendTranscript, isSpeaking, status]);
// Speech recognition with voice detection callback // Speech recognition with voice detection callback
const { const {
startListening, startListening,
stopListening, stopListening,
isListening: sttIsListening,
} = useSpeechRecognition({ } = useSpeechRecognition({
continuous: true, continuous: true,
interimResults: true, interimResults: true,
onVoiceDetected: handleVoiceDetected, onVoiceDetected: handleVoiceDetected,
onResult: handleSpeechResult, onResult: handleSpeechResult,
onEnd: handleSTTEnd,
}); });
// Update session active ref when isListening changes
useEffect(() => {
sessionActiveRef.current = isListening;
if (!isListening) {
shouldRestartSTTRef.current = false;
}
}, [isListening]);
// Start/stop STT when voice session starts/stops // Start/stop STT when voice session starts/stops
useEffect(() => { useEffect(() => {
if (isListening) { if (isListening) {
console.log('[TabLayout] Starting STT for voice session');
startListening(); startListening();
} else { } else {
console.log('[TabLayout] Stopping STT - session ended');
stopListening(); stopListening();
} }
}, [isListening, startListening, stopListening]); }, [isListening, startListening, stopListening]);
// Restart STT if it ended while session is still active
// This ensures continuous listening even during/after TTS playback
useEffect(() => {
if (shouldRestartSTTRef.current && sessionActiveRef.current && !sttIsListening) {
console.log('[TabLayout] Restarting STT - session still active');
shouldRestartSTTRef.current = false;
// Small delay to ensure clean restart
const timer = setTimeout(() => {
if (sessionActiveRef.current) {
startListening();
}
}, 100);
return () => clearTimeout(timer);
}
}, [sttIsListening, startListening]);
// Handle voice FAB press - toggle listening mode // Handle voice FAB press - toggle listening mode
const handleVoiceFABPress = useCallback(() => { const handleVoiceFABPress = useCallback(() => {
if (isListening) { if (isListening) {