Allow user to interrupt Julia voice by speaking
- Enable STT listening during TTS playback to detect user interruption - When voice detected during Julia's speech, immediately stop TTS - Store interrupted transcript and process it after TTS stops - Remove 'speaking' status check from STT watchdog to allow parallel STT+TTS - Add pending transcript mechanism to handle race condition between TTS stop and STT final result 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
bdb4ceb8d2
commit
3ef1d8e54c
@ -36,13 +36,18 @@ export default function TabLayout() {
|
|||||||
const sessionActiveRef = useRef(false);
|
const sessionActiveRef = useRef(false);
|
||||||
// Track if we need to restart STT after it ends during active session
|
// Track if we need to restart STT after it ends during active session
|
||||||
const shouldRestartSTTRef = useRef(false);
|
const shouldRestartSTTRef = useRef(false);
|
||||||
|
// Track pending transcript from interruption (to send after TTS stops)
|
||||||
|
const pendingInterruptTranscriptRef = useRef<string | null>(null);
|
||||||
|
|
||||||
// Callback for voice detection - interrupt TTS when user speaks
|
// Callback for voice detection - interrupt TTS when user speaks
|
||||||
const handleVoiceDetected = useCallback(() => {
|
const handleVoiceDetected = useCallback(() => {
|
||||||
// Interrupt TTS when user starts speaking during 'speaking' state
|
// Interrupt TTS when user starts speaking during 'speaking' state
|
||||||
if (status === 'speaking' || isSpeaking) {
|
if (status === 'speaking' || isSpeaking) {
|
||||||
console.log('[TabLayout] Voice detected during TTS playback - interrupting');
|
console.log('[TabLayout] Voice detected during TTS playback - INTERRUPTING Julia');
|
||||||
interruptIfSpeaking();
|
const wasInterrupted = interruptIfSpeaking();
|
||||||
|
if (wasInterrupted) {
|
||||||
|
console.log('[TabLayout] TTS interrupted successfully, now listening to user');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}, [status, isSpeaking, interruptIfSpeaking]);
|
}, [status, isSpeaking, interruptIfSpeaking]);
|
||||||
|
|
||||||
@ -59,14 +64,15 @@ export default function TabLayout() {
|
|||||||
// Callback for STT results
|
// Callback for STT results
|
||||||
const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => {
|
const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => {
|
||||||
if (isFinal) {
|
if (isFinal) {
|
||||||
// Only process final results when NOT speaking (avoid processing interrupted speech)
|
// Check if we're still in speaking mode (user interrupted Julia)
|
||||||
if (!isSpeaking && status !== 'speaking') {
|
if (isSpeaking || status === 'speaking') {
|
||||||
setTranscript(transcript);
|
// Store the transcript to send after TTS fully stops
|
||||||
// Send to API when final result is received
|
console.log('[TabLayout] Got final result while TTS playing - storing for after interruption:', transcript);
|
||||||
sendTranscript(transcript);
|
pendingInterruptTranscriptRef.current = transcript;
|
||||||
} else {
|
} else {
|
||||||
// Got final result while speaking - this is the interruption
|
// Normal case: not speaking, send immediately
|
||||||
console.log('[TabLayout] Got final result while TTS playing - user interrupted');
|
setTranscript(transcript);
|
||||||
|
sendTranscript(transcript);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
setPartialTranscript(transcript);
|
setPartialTranscript(transcript);
|
||||||
@ -126,14 +132,25 @@ export default function TabLayout() {
|
|||||||
const prevStatusRef = useRef<typeof status>('idle');
|
const prevStatusRef = useRef<typeof status>('idle');
|
||||||
|
|
||||||
// Auto-restart STT when TTS finishes (status changes from 'speaking' to 'listening')
|
// Auto-restart STT when TTS finishes (status changes from 'speaking' to 'listening')
|
||||||
|
// Also process any pending transcript from user interruption
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const prevStatus = prevStatusRef.current;
|
const prevStatus = prevStatusRef.current;
|
||||||
prevStatusRef.current = status;
|
prevStatusRef.current = status;
|
||||||
|
|
||||||
// When transitioning from speaking to listening, restart STT
|
// When transitioning from speaking to listening, handle pending interrupt transcript
|
||||||
if (prevStatus === 'speaking' && status === 'listening' && sessionActiveRef.current) {
|
if (prevStatus === 'speaking' && status === 'listening' && sessionActiveRef.current) {
|
||||||
console.log('[TabLayout] TTS finished - auto-restarting STT');
|
console.log('[TabLayout] TTS finished/interrupted - checking for pending transcript');
|
||||||
// Small delay to ensure TTS cleanup is complete
|
|
||||||
|
// Process pending transcript from interruption if any
|
||||||
|
const pendingTranscript = pendingInterruptTranscriptRef.current;
|
||||||
|
if (pendingTranscript) {
|
||||||
|
console.log('[TabLayout] Processing pending interrupt transcript:', pendingTranscript);
|
||||||
|
pendingInterruptTranscriptRef.current = null;
|
||||||
|
setTranscript(pendingTranscript);
|
||||||
|
sendTranscript(pendingTranscript);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Small delay to ensure TTS cleanup is complete, then restart STT
|
||||||
const timer = setTimeout(() => {
|
const timer = setTimeout(() => {
|
||||||
if (sessionActiveRef.current && !sttIsListening) {
|
if (sessionActiveRef.current && !sttIsListening) {
|
||||||
startListening();
|
startListening();
|
||||||
@ -141,7 +158,7 @@ export default function TabLayout() {
|
|||||||
}, 200);
|
}, 200);
|
||||||
return () => clearTimeout(timer);
|
return () => clearTimeout(timer);
|
||||||
}
|
}
|
||||||
}, [status, sttIsListening, startListening]);
|
}, [status, sttIsListening, startListening, setTranscript, sendTranscript]);
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// TAB NAVIGATION PERSISTENCE
|
// TAB NAVIGATION PERSISTENCE
|
||||||
@ -154,18 +171,20 @@ export default function TabLayout() {
|
|||||||
|
|
||||||
// Monitor and recover STT state during tab navigation
|
// Monitor and recover STT state during tab navigation
|
||||||
// If session is active but STT stopped unexpectedly, restart it
|
// If session is active but STT stopped unexpectedly, restart it
|
||||||
|
// IMPORTANT: STT should run DURING TTS playback to detect user interruption!
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
// Check every 500ms if STT needs to be restarted
|
// Check every 500ms if STT needs to be restarted
|
||||||
const intervalId = setInterval(() => {
|
const intervalId = setInterval(() => {
|
||||||
// Only act if session should be active (isListening from VoiceContext)
|
// Only act if session should be active (isListening from VoiceContext)
|
||||||
// but STT is not actually listening, and we're not in speaking/processing mode
|
// but STT is not actually listening
|
||||||
|
// Note: We DO want STT running during 'speaking' to detect interruption!
|
||||||
|
// Only skip during 'processing' (API call in progress)
|
||||||
if (
|
if (
|
||||||
sessionActiveRef.current &&
|
sessionActiveRef.current &&
|
||||||
!sttIsListening &&
|
!sttIsListening &&
|
||||||
status !== 'speaking' &&
|
|
||||||
status !== 'processing'
|
status !== 'processing'
|
||||||
) {
|
) {
|
||||||
console.log('[TabLayout] STT watchdog: restarting STT (session active but STT stopped)');
|
console.log('[TabLayout] STT watchdog: restarting STT (session active but STT stopped, status:', status, ')');
|
||||||
startListening();
|
startListening();
|
||||||
}
|
}
|
||||||
}, 500);
|
}, 500);
|
||||||
@ -179,8 +198,9 @@ export default function TabLayout() {
|
|||||||
const handleAppStateChange = (nextAppState: AppStateStatus) => {
|
const handleAppStateChange = (nextAppState: AppStateStatus) => {
|
||||||
if (nextAppState === 'active' && sessionActiveRef.current) {
|
if (nextAppState === 'active' && sessionActiveRef.current) {
|
||||||
// App came to foreground, give it a moment then check STT
|
// App came to foreground, give it a moment then check STT
|
||||||
|
// STT should run even during 'speaking' to detect user interruption
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
if (sessionActiveRef.current && !sttIsListening && status !== 'speaking' && status !== 'processing') {
|
if (sessionActiveRef.current && !sttIsListening && status !== 'processing') {
|
||||||
console.log('[TabLayout] App foregrounded - restarting STT');
|
console.log('[TabLayout] App foregrounded - restarting STT');
|
||||||
startListening();
|
startListening();
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user