Allow user to interrupt Julia voice by speaking

- Enable STT listening during TTS playback to detect user interruption
- When voice detected during Julia's speech, immediately stop TTS
- Store interrupted transcript and process it after TTS stops
- Remove 'speaking' status check from STT watchdog to allow parallel STT+TTS
- Add pending transcript mechanism to handle race condition between
  TTS stop and STT final result

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Sergei 2026-01-27 16:49:19 -08:00
parent bdb4ceb8d2
commit 3ef1d8e54c

View File

@ -36,13 +36,18 @@ export default function TabLayout() {
const sessionActiveRef = useRef(false); const sessionActiveRef = useRef(false);
// Track if we need to restart STT after it ends during active session // Track if we need to restart STT after it ends during active session
const shouldRestartSTTRef = useRef(false); const shouldRestartSTTRef = useRef(false);
// Track pending transcript from interruption (to send after TTS stops)
const pendingInterruptTranscriptRef = useRef<string | null>(null);
// Callback for voice detection - interrupt TTS when user speaks // Callback for voice detection - interrupt TTS when user speaks
const handleVoiceDetected = useCallback(() => { const handleVoiceDetected = useCallback(() => {
// Interrupt TTS when user starts speaking during 'speaking' state // Interrupt TTS when user starts speaking during 'speaking' state
if (status === 'speaking' || isSpeaking) { if (status === 'speaking' || isSpeaking) {
console.log('[TabLayout] Voice detected during TTS playback - interrupting'); console.log('[TabLayout] Voice detected during TTS playback - INTERRUPTING Julia');
interruptIfSpeaking(); const wasInterrupted = interruptIfSpeaking();
if (wasInterrupted) {
console.log('[TabLayout] TTS interrupted successfully, now listening to user');
}
} }
}, [status, isSpeaking, interruptIfSpeaking]); }, [status, isSpeaking, interruptIfSpeaking]);
@ -59,14 +64,15 @@ export default function TabLayout() {
// Callback for STT results // Callback for STT results
const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => { const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => {
if (isFinal) { if (isFinal) {
// Only process final results when NOT speaking (avoid processing interrupted speech) // Check if we're still in speaking mode (user interrupted Julia)
if (!isSpeaking && status !== 'speaking') { if (isSpeaking || status === 'speaking') {
setTranscript(transcript); // Store the transcript to send after TTS fully stops
// Send to API when final result is received console.log('[TabLayout] Got final result while TTS playing - storing for after interruption:', transcript);
sendTranscript(transcript); pendingInterruptTranscriptRef.current = transcript;
} else { } else {
// Got final result while speaking - this is the interruption // Normal case: not speaking, send immediately
console.log('[TabLayout] Got final result while TTS playing - user interrupted'); setTranscript(transcript);
sendTranscript(transcript);
} }
} else { } else {
setPartialTranscript(transcript); setPartialTranscript(transcript);
@ -126,14 +132,25 @@ export default function TabLayout() {
const prevStatusRef = useRef<typeof status>('idle'); const prevStatusRef = useRef<typeof status>('idle');
// Auto-restart STT when TTS finishes (status changes from 'speaking' to 'listening') // Auto-restart STT when TTS finishes (status changes from 'speaking' to 'listening')
// Also process any pending transcript from user interruption
useEffect(() => { useEffect(() => {
const prevStatus = prevStatusRef.current; const prevStatus = prevStatusRef.current;
prevStatusRef.current = status; prevStatusRef.current = status;
// When transitioning from speaking to listening, restart STT // When transitioning from speaking to listening, handle pending interrupt transcript
if (prevStatus === 'speaking' && status === 'listening' && sessionActiveRef.current) { if (prevStatus === 'speaking' && status === 'listening' && sessionActiveRef.current) {
console.log('[TabLayout] TTS finished - auto-restarting STT'); console.log('[TabLayout] TTS finished/interrupted - checking for pending transcript');
// Small delay to ensure TTS cleanup is complete
// Process pending transcript from interruption if any
const pendingTranscript = pendingInterruptTranscriptRef.current;
if (pendingTranscript) {
console.log('[TabLayout] Processing pending interrupt transcript:', pendingTranscript);
pendingInterruptTranscriptRef.current = null;
setTranscript(pendingTranscript);
sendTranscript(pendingTranscript);
}
// Small delay to ensure TTS cleanup is complete, then restart STT
const timer = setTimeout(() => { const timer = setTimeout(() => {
if (sessionActiveRef.current && !sttIsListening) { if (sessionActiveRef.current && !sttIsListening) {
startListening(); startListening();
@ -141,7 +158,7 @@ export default function TabLayout() {
}, 200); }, 200);
return () => clearTimeout(timer); return () => clearTimeout(timer);
} }
}, [status, sttIsListening, startListening]); }, [status, sttIsListening, startListening, setTranscript, sendTranscript]);
// ============================================================================ // ============================================================================
// TAB NAVIGATION PERSISTENCE // TAB NAVIGATION PERSISTENCE
@ -154,18 +171,20 @@ export default function TabLayout() {
// Monitor and recover STT state during tab navigation // Monitor and recover STT state during tab navigation
// If session is active but STT stopped unexpectedly, restart it // If session is active but STT stopped unexpectedly, restart it
// IMPORTANT: STT should run DURING TTS playback to detect user interruption!
useEffect(() => { useEffect(() => {
// Check every 500ms if STT needs to be restarted // Check every 500ms if STT needs to be restarted
const intervalId = setInterval(() => { const intervalId = setInterval(() => {
// Only act if session should be active (isListening from VoiceContext) // Only act if session should be active (isListening from VoiceContext)
// but STT is not actually listening, and we're not in speaking/processing mode // but STT is not actually listening
// Note: We DO want STT running during 'speaking' to detect interruption!
// Only skip during 'processing' (API call in progress)
if ( if (
sessionActiveRef.current && sessionActiveRef.current &&
!sttIsListening && !sttIsListening &&
status !== 'speaking' &&
status !== 'processing' status !== 'processing'
) { ) {
console.log('[TabLayout] STT watchdog: restarting STT (session active but STT stopped)'); console.log('[TabLayout] STT watchdog: restarting STT (session active but STT stopped, status:', status, ')');
startListening(); startListening();
} }
}, 500); }, 500);
@ -179,8 +198,9 @@ export default function TabLayout() {
const handleAppStateChange = (nextAppState: AppStateStatus) => { const handleAppStateChange = (nextAppState: AppStateStatus) => {
if (nextAppState === 'active' && sessionActiveRef.current) { if (nextAppState === 'active' && sessionActiveRef.current) {
// App came to foreground, give it a moment then check STT // App came to foreground, give it a moment then check STT
// STT should run even during 'speaking' to detect user interruption
setTimeout(() => { setTimeout(() => {
if (sessionActiveRef.current && !sttIsListening && status !== 'speaking' && status !== 'processing') { if (sessionActiveRef.current && !sttIsListening && status !== 'processing') {
console.log('[TabLayout] App foregrounded - restarting STT'); console.log('[TabLayout] App foregrounded - restarting STT');
startListening(); startListening();
} }