/** * Speech Recognition Hook * * Wraps @jamsch/expo-speech-recognition for easy use in components. * Provides start/stop controls, recognized text, and status states. * * NOTE: Gracefully handles missing native module (Expo Go) * - In Expo Go: isAvailable = false, all methods are no-ops * - In Dev Build: Full functionality * * Usage: * ```typescript * const { startListening, stopListening, isListening, recognizedText, error } = useSpeechRecognition(); * * // Start listening (will request permissions if needed) * await startListening(); * * // Stop and get final result * stopListening(); * * // recognizedText contains the transcript * ``` */ import { useState, useCallback, useRef, useEffect } from 'react'; import { Platform } from 'react-native'; // Try to import the native module - may fail in Expo Go let ExpoSpeechRecognitionModule: any = null; let useSpeechRecognitionEvent: any = () => {}; // no-op by default try { const speechRecognition = require('expo-speech-recognition'); ExpoSpeechRecognitionModule = speechRecognition.ExpoSpeechRecognitionModule; useSpeechRecognitionEvent = speechRecognition.useSpeechRecognitionEvent; } catch (e) { console.warn('[SpeechRecognition] Native module not available (Expo Go?). Speech recognition disabled.'); } export interface UseSpeechRecognitionOptions { /** Language for recognition (default: 'en-US') */ lang?: string; /** Whether to return interim results while speaking (default: true) */ interimResults?: boolean; /** Whether to continue listening after pause (default: false) */ continuous?: boolean; /** Whether to add punctuation (iOS only, default: true) */ addsPunctuation?: boolean; /** Callback when speech recognition result is available */ onResult?: (transcript: string, isFinal: boolean) => void; /** Callback when an error occurs */ onError?: (error: string) => void; /** Callback when speech recognition starts */ onStart?: () => void; /** Callback when speech recognition ends */ onEnd?: () => void; /** Callback when voice activity is detected (first interim result) - useful for interrupting TTS */ onVoiceDetected?: () => void; } export interface UseSpeechRecognitionReturn { /** Start listening for speech */ startListening: () => Promise; /** Stop listening and finalize result */ stopListening: () => void; /** Abort listening without processing */ abortListening: () => void; /** Whether currently listening */ isListening: boolean; /** Whether speech recognition is available on this device */ isAvailable: boolean; /** Current recognized text (updates in real-time if interimResults=true) */ recognizedText: string; /** Partial transcript (interim result, not final) */ partialTranscript: string; /** Error message if any */ error: string | null; /** Clear the recognized text and error */ reset: () => void; } export function useSpeechRecognition( options: UseSpeechRecognitionOptions = {} ): UseSpeechRecognitionReturn { const { lang = 'en-US', interimResults = true, continuous = false, addsPunctuation = true, onResult, onError, onStart, onEnd, onVoiceDetected, } = options; const [isListening, setIsListening] = useState(false); const [isAvailable, setIsAvailable] = useState(!!ExpoSpeechRecognitionModule); const [recognizedText, setRecognizedText] = useState(''); const [partialTranscript, setPartialTranscript] = useState(''); const [error, setError] = useState(null); // Track if we're in the middle of starting to prevent double-starts const isStartingRef = useRef(false); // Track if voice has been detected in current session (for onVoiceDetected callback) const voiceDetectedRef = useRef(false); // Track last partial transcript for iOS fix (iOS never sends isFinal:true) const lastPartialRef = useRef(''); // Check availability on mount useEffect(() => { if (!ExpoSpeechRecognitionModule) { setIsAvailable(false); return; } const checkAvailability = async () => { try { // Check if we can get permissions (indirect availability check) const status = await ExpoSpeechRecognitionModule.getPermissionsAsync(); // If we can query permissions, the module is available setIsAvailable(true); console.log('[SpeechRecognition] Available, permission status:', status.status); } catch (err) { console.warn('[SpeechRecognition] Not available:', err); setIsAvailable(false); } }; checkAvailability(); }, []); // Event: Recognition started useSpeechRecognitionEvent('start', () => { console.log('[SpeechRecognition] Started'); setIsListening(true); setError(null); isStartingRef.current = false; voiceDetectedRef.current = false; // Reset voice detection flag for new session onStart?.(); }); // Event: Recognition ended useSpeechRecognitionEvent('end', () => { console.log('[SpeechRecognition] Ended'); // iOS FIX: iOS never sends isFinal:true, so we send last partial as final when STT ends const lastPartial = lastPartialRef.current; if (lastPartial && lastPartial.trim().length > 0) { console.log('[SpeechRecognition] 🍎 iOS FIX - Sending last partial as final:', lastPartial); setRecognizedText(lastPartial); onResult?.(lastPartial, true); // Send as final=true lastPartialRef.current = ''; // Clear after sending } setIsListening(false); setPartialTranscript(''); isStartingRef.current = false; voiceDetectedRef.current = false; // Reset for next session onEnd?.(); }); // Event: Result available useSpeechRecognitionEvent('result', (event: any) => { const results = event.results; if (results && results.length > 0) { const result = results[results.length - 1]; const transcript = result?.transcript || ''; const isFinal = event.isFinal ?? false; console.log('[SpeechRecognition] Result:', transcript.slice(0, 50), 'final:', isFinal); // Trigger onVoiceDetected on first result (voice activity detected) if (!voiceDetectedRef.current && transcript.length > 0) { voiceDetectedRef.current = true; console.log('[SpeechRecognition] Voice activity detected'); onVoiceDetected?.(); } if (isFinal) { setRecognizedText(transcript); setPartialTranscript(''); lastPartialRef.current = ''; // Clear after final } else { setPartialTranscript(transcript); lastPartialRef.current = transcript; // Save for iOS fix } onResult?.(transcript, isFinal); } }); // Event: Error occurred useSpeechRecognitionEvent('error', (event: any) => { const errorCode = event.error || ''; const errorMessage = event.message || errorCode || 'Speech recognition error'; // "no-speech" is normal when user is silent — ignore completely if (errorCode === 'no-speech') { console.log('[SpeechRecognition] No speech detected (silence) - ignoring'); setIsListening(false); isStartingRef.current = false; return; } console.warn('[SpeechRecognition] Error:', errorMessage); setError(errorMessage); onError?.(errorMessage); setIsListening(false); isStartingRef.current = false; }); /** * Start listening for speech * @returns true if started successfully, false otherwise */ const startListening = useCallback(async (): Promise => { if (!ExpoSpeechRecognitionModule) { console.warn('[SpeechRecognition] Cannot start - native module not available'); return false; } if (isListening || isStartingRef.current) { console.log('[SpeechRecognition] Already listening or starting'); return false; } if (!isAvailable) { const msg = 'Speech recognition is not available on this device'; console.warn('[SpeechRecognition]', msg); setError(msg); onError?.(msg); return false; } isStartingRef.current = true; setError(null); setRecognizedText(''); setPartialTranscript(''); try { // Request permissions const permissionResult = await ExpoSpeechRecognitionModule.requestPermissionsAsync(); if (!permissionResult.granted) { const msg = 'Microphone permission denied'; console.warn('[SpeechRecognition]', msg); setError(msg); onError?.(msg); isStartingRef.current = false; return false; } console.log('[SpeechRecognition] Starting with lang:', lang); // Start recognition ExpoSpeechRecognitionModule.start({ lang, interimResults, continuous, addsPunctuation: Platform.OS === 'ios' ? addsPunctuation : undefined, // Android: use CLOUD recognition for better quality // On-device models often have worse accuracy // Setting to false allows the system to use Google's cloud ASR requiresOnDeviceRecognition: false, // Android-specific: longer silence timeout for more natural pauses // CRITICAL FIX: Increased from 2000ms to 4000ms to prevent premature speech cutoff // This allows users to pause between sentences without being cut off androidIntentOptions: Platform.OS === 'android' ? { EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS: 4000, // 4 sec silence before final (was 2000) EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS: 3000, // 3 sec pause detection (was 1500) } : undefined, }); return true; } catch (err) { const msg = err instanceof Error ? err.message : 'Failed to start speech recognition'; console.warn('[SpeechRecognition] Start error:', msg); setError(msg); onError?.(msg); isStartingRef.current = false; return false; } }, [isListening, isAvailable, lang, interimResults, continuous, addsPunctuation, onError]); /** * Stop listening and process final result */ const stopListening = useCallback(() => { if (!ExpoSpeechRecognitionModule) return; if (!isListening && !isStartingRef.current) { console.log('[SpeechRecognition] Not listening, nothing to stop'); return; } console.log('[SpeechRecognition] Stopping...'); try { ExpoSpeechRecognitionModule.stop(); } catch (err) { console.warn('[SpeechRecognition] Stop error:', err); } }, [isListening]); /** * Abort listening without processing */ const abortListening = useCallback(() => { if (!ExpoSpeechRecognitionModule) return; if (!isListening && !isStartingRef.current) { return; } console.log('[SpeechRecognition] Aborting...'); try { ExpoSpeechRecognitionModule.abort(); } catch (err) { console.warn('[SpeechRecognition] Abort error:', err); } setIsListening(false); setPartialTranscript(''); isStartingRef.current = false; }, [isListening]); /** * Reset state */ const reset = useCallback(() => { setRecognizedText(''); setPartialTranscript(''); setError(null); }, []); return { startListening, stopListening, abortListening, isListening, isAvailable, recognizedText, partialTranscript, error, reset, }; }