diff --git a/hooks/useSpeechRecognition.ts b/hooks/useSpeechRecognition.ts new file mode 100644 index 0000000..987b22a --- /dev/null +++ b/hooks/useSpeechRecognition.ts @@ -0,0 +1,281 @@ +/** + * Speech Recognition Hook + * + * Wraps @jamsch/expo-speech-recognition for easy use in components. + * Provides start/stop controls, recognized text, and status states. + * + * Usage: + * ```typescript + * const { startListening, stopListening, isListening, recognizedText, error } = useSpeechRecognition(); + * + * // Start listening (will request permissions if needed) + * await startListening(); + * + * // Stop and get final result + * stopListening(); + * + * // recognizedText contains the transcript + * ``` + */ + +import { useState, useCallback, useRef, useEffect } from 'react'; +import { + ExpoSpeechRecognitionModule, + useSpeechRecognitionEvent, +} from '@jamsch/expo-speech-recognition'; +import { Platform } from 'react-native'; + +export interface UseSpeechRecognitionOptions { + /** Language for recognition (default: 'en-US') */ + lang?: string; + /** Whether to return interim results while speaking (default: true) */ + interimResults?: boolean; + /** Whether to continue listening after pause (default: false) */ + continuous?: boolean; + /** Whether to add punctuation (iOS only, default: true) */ + addsPunctuation?: boolean; + /** Callback when speech recognition result is available */ + onResult?: (transcript: string, isFinal: boolean) => void; + /** Callback when an error occurs */ + onError?: (error: string) => void; + /** Callback when speech recognition starts */ + onStart?: () => void; + /** Callback when speech recognition ends */ + onEnd?: () => void; +} + +export interface UseSpeechRecognitionReturn { + /** Start listening for speech */ + startListening: () => Promise; + /** Stop listening and finalize result */ + stopListening: () => void; + /** Abort listening without processing */ + abortListening: () => void; + /** Whether currently listening */ + isListening: boolean; + /** Whether speech recognition is available on this device */ + isAvailable: boolean; + /** Current recognized text (updates in real-time if interimResults=true) */ + recognizedText: string; + /** Partial transcript (interim result, not final) */ + partialTranscript: string; + /** Error message if any */ + error: string | null; + /** Clear the recognized text and error */ + reset: () => void; +} + +export function useSpeechRecognition( + options: UseSpeechRecognitionOptions = {} +): UseSpeechRecognitionReturn { + const { + lang = 'en-US', + interimResults = true, + continuous = false, + addsPunctuation = true, + onResult, + onError, + onStart, + onEnd, + } = options; + + const [isListening, setIsListening] = useState(false); + const [isAvailable, setIsAvailable] = useState(true); + const [recognizedText, setRecognizedText] = useState(''); + const [partialTranscript, setPartialTranscript] = useState(''); + const [error, setError] = useState(null); + + // Track if we're in the middle of starting to prevent double-starts + const isStartingRef = useRef(false); + + // Check availability on mount + useEffect(() => { + const checkAvailability = async () => { + try { + // Check if we can get permissions (indirect availability check) + const status = await ExpoSpeechRecognitionModule.getPermissionsAsync(); + // If we can query permissions, the module is available + setIsAvailable(true); + console.log('[SpeechRecognition] Available, permission status:', status.status); + } catch (err) { + console.error('[SpeechRecognition] Not available:', err); + setIsAvailable(false); + } + }; + checkAvailability(); + }, []); + + // Event: Recognition started + useSpeechRecognitionEvent('start', () => { + console.log('[SpeechRecognition] Started'); + setIsListening(true); + setError(null); + isStartingRef.current = false; + onStart?.(); + }); + + // Event: Recognition ended + useSpeechRecognitionEvent('end', () => { + console.log('[SpeechRecognition] Ended'); + setIsListening(false); + setPartialTranscript(''); + isStartingRef.current = false; + onEnd?.(); + }); + + // Event: Result available + useSpeechRecognitionEvent('result', (event) => { + const results = event.results; + if (results && results.length > 0) { + const result = results[results.length - 1]; + const transcript = result?.transcript || ''; + const isFinal = event.isFinal ?? false; + + console.log('[SpeechRecognition] Result:', transcript.slice(0, 50), 'final:', isFinal); + + if (isFinal) { + setRecognizedText(transcript); + setPartialTranscript(''); + } else { + setPartialTranscript(transcript); + } + + onResult?.(transcript, isFinal); + } + }); + + // Event: Error occurred + useSpeechRecognitionEvent('error', (event) => { + const errorMessage = event.message || event.error || 'Speech recognition error'; + console.error('[SpeechRecognition] Error:', errorMessage); + + // Don't set error for "no-speech" - this is normal when user doesn't say anything + if (event.error !== 'no-speech') { + setError(errorMessage); + onError?.(errorMessage); + } + + setIsListening(false); + isStartingRef.current = false; + }); + + /** + * Start listening for speech + * @returns true if started successfully, false otherwise + */ + const startListening = useCallback(async (): Promise => { + if (isListening || isStartingRef.current) { + console.log('[SpeechRecognition] Already listening or starting'); + return false; + } + + if (!isAvailable) { + const msg = 'Speech recognition is not available on this device'; + console.error('[SpeechRecognition]', msg); + setError(msg); + onError?.(msg); + return false; + } + + isStartingRef.current = true; + setError(null); + setRecognizedText(''); + setPartialTranscript(''); + + try { + // Request permissions + const permissionResult = await ExpoSpeechRecognitionModule.requestPermissionsAsync(); + + if (!permissionResult.granted) { + const msg = 'Microphone permission denied'; + console.error('[SpeechRecognition]', msg); + setError(msg); + onError?.(msg); + isStartingRef.current = false; + return false; + } + + console.log('[SpeechRecognition] Starting with lang:', lang); + + // Start recognition + ExpoSpeechRecognitionModule.start({ + lang, + interimResults, + continuous, + addsPunctuation: Platform.OS === 'ios' ? addsPunctuation : undefined, + // Android-specific: longer silence timeout for more natural pauses + androidIntentOptions: Platform.OS === 'android' ? { + EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS: 2000, + EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS: 1500, + } : undefined, + }); + + return true; + } catch (err) { + const msg = err instanceof Error ? err.message : 'Failed to start speech recognition'; + console.error('[SpeechRecognition] Start error:', msg); + setError(msg); + onError?.(msg); + isStartingRef.current = false; + return false; + } + }, [isListening, isAvailable, lang, interimResults, continuous, addsPunctuation, onError]); + + /** + * Stop listening and process final result + */ + const stopListening = useCallback(() => { + if (!isListening && !isStartingRef.current) { + console.log('[SpeechRecognition] Not listening, nothing to stop'); + return; + } + + console.log('[SpeechRecognition] Stopping...'); + try { + ExpoSpeechRecognitionModule.stop(); + } catch (err) { + console.warn('[SpeechRecognition] Stop error:', err); + } + }, [isListening]); + + /** + * Abort listening without processing + */ + const abortListening = useCallback(() => { + if (!isListening && !isStartingRef.current) { + return; + } + + console.log('[SpeechRecognition] Aborting...'); + try { + ExpoSpeechRecognitionModule.abort(); + } catch (err) { + console.warn('[SpeechRecognition] Abort error:', err); + } + + setIsListening(false); + setPartialTranscript(''); + isStartingRef.current = false; + }, [isListening]); + + /** + * Reset state + */ + const reset = useCallback(() => { + setRecognizedText(''); + setPartialTranscript(''); + setError(null); + }, []); + + return { + startListening, + stopListening, + abortListening, + isListening, + isAvailable, + recognizedText, + partialTranscript, + error, + reset, + }; +}