wellnua-lite/hooks/useSpeechRecognition.ts
Sergei 05f872d067 fix: voice session improvements - FAB stop, echo prevention, chat TTS
- FAB button now correctly stops session during speaking/processing states
- Echo prevention: STT stopped during TTS playback, results ignored during speaking
- Chat TTS only speaks when voice session is active (no auto-speak for text chat)
- Session stop now aborts in-flight API requests and prevents race conditions
- STT restarts after TTS with 800ms delay for audio focus release
- Pending interrupt transcript processed after TTS completion
- ChatContext added for message persistence across tab navigation
- VoiceFAB redesigned with state-based animations
- console.error replaced with console.warn across voice pipeline
- no-speech STT errors silenced (normal silence behavior)
2026-01-27 22:59:55 -08:00

327 lines
10 KiB
TypeScript

/**
* Speech Recognition Hook
*
* Wraps @jamsch/expo-speech-recognition for easy use in components.
* Provides start/stop controls, recognized text, and status states.
*
* NOTE: Gracefully handles missing native module (Expo Go)
* - In Expo Go: isAvailable = false, all methods are no-ops
* - In Dev Build: Full functionality
*
* Usage:
* ```typescript
* const { startListening, stopListening, isListening, recognizedText, error } = useSpeechRecognition();
*
* // Start listening (will request permissions if needed)
* await startListening();
*
* // Stop and get final result
* stopListening();
*
* // recognizedText contains the transcript
* ```
*/
import { useState, useCallback, useRef, useEffect } from 'react';
import { Platform } from 'react-native';
// Try to import the native module - may fail in Expo Go
let ExpoSpeechRecognitionModule: any = null;
let useSpeechRecognitionEvent: any = () => {}; // no-op by default
try {
const speechRecognition = require('@jamsch/expo-speech-recognition');
ExpoSpeechRecognitionModule = speechRecognition.ExpoSpeechRecognitionModule;
useSpeechRecognitionEvent = speechRecognition.useSpeechRecognitionEvent;
} catch (e) {
console.warn('[SpeechRecognition] Native module not available (Expo Go?). Speech recognition disabled.');
}
export interface UseSpeechRecognitionOptions {
/** Language for recognition (default: 'en-US') */
lang?: string;
/** Whether to return interim results while speaking (default: true) */
interimResults?: boolean;
/** Whether to continue listening after pause (default: false) */
continuous?: boolean;
/** Whether to add punctuation (iOS only, default: true) */
addsPunctuation?: boolean;
/** Callback when speech recognition result is available */
onResult?: (transcript: string, isFinal: boolean) => void;
/** Callback when an error occurs */
onError?: (error: string) => void;
/** Callback when speech recognition starts */
onStart?: () => void;
/** Callback when speech recognition ends */
onEnd?: () => void;
/** Callback when voice activity is detected (first interim result) - useful for interrupting TTS */
onVoiceDetected?: () => void;
}
export interface UseSpeechRecognitionReturn {
/** Start listening for speech */
startListening: () => Promise<boolean>;
/** Stop listening and finalize result */
stopListening: () => void;
/** Abort listening without processing */
abortListening: () => void;
/** Whether currently listening */
isListening: boolean;
/** Whether speech recognition is available on this device */
isAvailable: boolean;
/** Current recognized text (updates in real-time if interimResults=true) */
recognizedText: string;
/** Partial transcript (interim result, not final) */
partialTranscript: string;
/** Error message if any */
error: string | null;
/** Clear the recognized text and error */
reset: () => void;
}
export function useSpeechRecognition(
options: UseSpeechRecognitionOptions = {}
): UseSpeechRecognitionReturn {
const {
lang = 'en-US',
interimResults = true,
continuous = false,
addsPunctuation = true,
onResult,
onError,
onStart,
onEnd,
onVoiceDetected,
} = options;
const [isListening, setIsListening] = useState(false);
const [isAvailable, setIsAvailable] = useState(!!ExpoSpeechRecognitionModule);
const [recognizedText, setRecognizedText] = useState('');
const [partialTranscript, setPartialTranscript] = useState('');
const [error, setError] = useState<string | null>(null);
// Track if we're in the middle of starting to prevent double-starts
const isStartingRef = useRef(false);
// Track if voice has been detected in current session (for onVoiceDetected callback)
const voiceDetectedRef = useRef(false);
// Check availability on mount
useEffect(() => {
if (!ExpoSpeechRecognitionModule) {
setIsAvailable(false);
return;
}
const checkAvailability = async () => {
try {
// Check if we can get permissions (indirect availability check)
const status = await ExpoSpeechRecognitionModule.getPermissionsAsync();
// If we can query permissions, the module is available
setIsAvailable(true);
console.log('[SpeechRecognition] Available, permission status:', status.status);
} catch (err) {
console.warn('[SpeechRecognition] Not available:', err);
setIsAvailable(false);
}
};
checkAvailability();
}, []);
// Event: Recognition started
useSpeechRecognitionEvent('start', () => {
console.log('[SpeechRecognition] Started');
setIsListening(true);
setError(null);
isStartingRef.current = false;
voiceDetectedRef.current = false; // Reset voice detection flag for new session
onStart?.();
});
// Event: Recognition ended
useSpeechRecognitionEvent('end', () => {
console.log('[SpeechRecognition] Ended');
setIsListening(false);
setPartialTranscript('');
isStartingRef.current = false;
voiceDetectedRef.current = false; // Reset for next session
onEnd?.();
});
// Event: Result available
useSpeechRecognitionEvent('result', (event: any) => {
const results = event.results;
if (results && results.length > 0) {
const result = results[results.length - 1];
const transcript = result?.transcript || '';
const isFinal = event.isFinal ?? false;
console.log('[SpeechRecognition] Result:', transcript.slice(0, 50), 'final:', isFinal);
// Trigger onVoiceDetected on first result (voice activity detected)
if (!voiceDetectedRef.current && transcript.length > 0) {
voiceDetectedRef.current = true;
console.log('[SpeechRecognition] Voice activity detected');
onVoiceDetected?.();
}
if (isFinal) {
setRecognizedText(transcript);
setPartialTranscript('');
} else {
setPartialTranscript(transcript);
}
onResult?.(transcript, isFinal);
}
});
// Event: Error occurred
useSpeechRecognitionEvent('error', (event: any) => {
const errorCode = event.error || '';
const errorMessage = event.message || errorCode || 'Speech recognition error';
// "no-speech" is normal when user is silent — ignore completely
if (errorCode === 'no-speech') {
console.log('[SpeechRecognition] No speech detected (silence) - ignoring');
setIsListening(false);
isStartingRef.current = false;
return;
}
console.warn('[SpeechRecognition] Error:', errorMessage);
setError(errorMessage);
onError?.(errorMessage);
setIsListening(false);
isStartingRef.current = false;
});
/**
* Start listening for speech
* @returns true if started successfully, false otherwise
*/
const startListening = useCallback(async (): Promise<boolean> => {
if (!ExpoSpeechRecognitionModule) {
console.warn('[SpeechRecognition] Cannot start - native module not available');
return false;
}
if (isListening || isStartingRef.current) {
console.log('[SpeechRecognition] Already listening or starting');
return false;
}
if (!isAvailable) {
const msg = 'Speech recognition is not available on this device';
console.warn('[SpeechRecognition]', msg);
setError(msg);
onError?.(msg);
return false;
}
isStartingRef.current = true;
setError(null);
setRecognizedText('');
setPartialTranscript('');
try {
// Request permissions
const permissionResult = await ExpoSpeechRecognitionModule.requestPermissionsAsync();
if (!permissionResult.granted) {
const msg = 'Microphone permission denied';
console.warn('[SpeechRecognition]', msg);
setError(msg);
onError?.(msg);
isStartingRef.current = false;
return false;
}
console.log('[SpeechRecognition] Starting with lang:', lang);
// Start recognition
ExpoSpeechRecognitionModule.start({
lang,
interimResults,
continuous,
addsPunctuation: Platform.OS === 'ios' ? addsPunctuation : undefined,
// Android-specific: longer silence timeout for more natural pauses
androidIntentOptions: Platform.OS === 'android' ? {
EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS: 2000,
EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS: 1500,
} : undefined,
});
return true;
} catch (err) {
const msg = err instanceof Error ? err.message : 'Failed to start speech recognition';
console.warn('[SpeechRecognition] Start error:', msg);
setError(msg);
onError?.(msg);
isStartingRef.current = false;
return false;
}
}, [isListening, isAvailable, lang, interimResults, continuous, addsPunctuation, onError]);
/**
* Stop listening and process final result
*/
const stopListening = useCallback(() => {
if (!ExpoSpeechRecognitionModule) return;
if (!isListening && !isStartingRef.current) {
console.log('[SpeechRecognition] Not listening, nothing to stop');
return;
}
console.log('[SpeechRecognition] Stopping...');
try {
ExpoSpeechRecognitionModule.stop();
} catch (err) {
console.warn('[SpeechRecognition] Stop error:', err);
}
}, [isListening]);
/**
* Abort listening without processing
*/
const abortListening = useCallback(() => {
if (!ExpoSpeechRecognitionModule) return;
if (!isListening && !isStartingRef.current) {
return;
}
console.log('[SpeechRecognition] Aborting...');
try {
ExpoSpeechRecognitionModule.abort();
} catch (err) {
console.warn('[SpeechRecognition] Abort error:', err);
}
setIsListening(false);
setPartialTranscript('');
isStartingRef.current = false;
}, [isListening]);
/**
* Reset state
*/
const reset = useCallback(() => {
setRecognizedText('');
setPartialTranscript('');
setError(null);
}, []);
return {
startListening,
stopListening,
abortListening,
isListening,
isAvailable,
recognizedText,
partialTranscript,
error,
reset,
};
}