- FAB button now correctly stops session during speaking/processing states - Echo prevention: STT stopped during TTS playback, results ignored during speaking - Chat TTS only speaks when voice session is active (no auto-speak for text chat) - Session stop now aborts in-flight API requests and prevents race conditions - STT restarts after TTS with 800ms delay for audio focus release - Pending interrupt transcript processed after TTS completion - ChatContext added for message persistence across tab navigation - VoiceFAB redesigned with state-based animations - console.error replaced with console.warn across voice pipeline - no-speech STT errors silenced (normal silence behavior)
327 lines
10 KiB
TypeScript
327 lines
10 KiB
TypeScript
/**
|
|
* Speech Recognition Hook
|
|
*
|
|
* Wraps @jamsch/expo-speech-recognition for easy use in components.
|
|
* Provides start/stop controls, recognized text, and status states.
|
|
*
|
|
* NOTE: Gracefully handles missing native module (Expo Go)
|
|
* - In Expo Go: isAvailable = false, all methods are no-ops
|
|
* - In Dev Build: Full functionality
|
|
*
|
|
* Usage:
|
|
* ```typescript
|
|
* const { startListening, stopListening, isListening, recognizedText, error } = useSpeechRecognition();
|
|
*
|
|
* // Start listening (will request permissions if needed)
|
|
* await startListening();
|
|
*
|
|
* // Stop and get final result
|
|
* stopListening();
|
|
*
|
|
* // recognizedText contains the transcript
|
|
* ```
|
|
*/
|
|
|
|
import { useState, useCallback, useRef, useEffect } from 'react';
|
|
import { Platform } from 'react-native';
|
|
|
|
// Try to import the native module - may fail in Expo Go
|
|
let ExpoSpeechRecognitionModule: any = null;
|
|
let useSpeechRecognitionEvent: any = () => {}; // no-op by default
|
|
|
|
try {
|
|
const speechRecognition = require('@jamsch/expo-speech-recognition');
|
|
ExpoSpeechRecognitionModule = speechRecognition.ExpoSpeechRecognitionModule;
|
|
useSpeechRecognitionEvent = speechRecognition.useSpeechRecognitionEvent;
|
|
} catch (e) {
|
|
console.warn('[SpeechRecognition] Native module not available (Expo Go?). Speech recognition disabled.');
|
|
}
|
|
|
|
export interface UseSpeechRecognitionOptions {
|
|
/** Language for recognition (default: 'en-US') */
|
|
lang?: string;
|
|
/** Whether to return interim results while speaking (default: true) */
|
|
interimResults?: boolean;
|
|
/** Whether to continue listening after pause (default: false) */
|
|
continuous?: boolean;
|
|
/** Whether to add punctuation (iOS only, default: true) */
|
|
addsPunctuation?: boolean;
|
|
/** Callback when speech recognition result is available */
|
|
onResult?: (transcript: string, isFinal: boolean) => void;
|
|
/** Callback when an error occurs */
|
|
onError?: (error: string) => void;
|
|
/** Callback when speech recognition starts */
|
|
onStart?: () => void;
|
|
/** Callback when speech recognition ends */
|
|
onEnd?: () => void;
|
|
/** Callback when voice activity is detected (first interim result) - useful for interrupting TTS */
|
|
onVoiceDetected?: () => void;
|
|
}
|
|
|
|
export interface UseSpeechRecognitionReturn {
|
|
/** Start listening for speech */
|
|
startListening: () => Promise<boolean>;
|
|
/** Stop listening and finalize result */
|
|
stopListening: () => void;
|
|
/** Abort listening without processing */
|
|
abortListening: () => void;
|
|
/** Whether currently listening */
|
|
isListening: boolean;
|
|
/** Whether speech recognition is available on this device */
|
|
isAvailable: boolean;
|
|
/** Current recognized text (updates in real-time if interimResults=true) */
|
|
recognizedText: string;
|
|
/** Partial transcript (interim result, not final) */
|
|
partialTranscript: string;
|
|
/** Error message if any */
|
|
error: string | null;
|
|
/** Clear the recognized text and error */
|
|
reset: () => void;
|
|
}
|
|
|
|
export function useSpeechRecognition(
|
|
options: UseSpeechRecognitionOptions = {}
|
|
): UseSpeechRecognitionReturn {
|
|
const {
|
|
lang = 'en-US',
|
|
interimResults = true,
|
|
continuous = false,
|
|
addsPunctuation = true,
|
|
onResult,
|
|
onError,
|
|
onStart,
|
|
onEnd,
|
|
onVoiceDetected,
|
|
} = options;
|
|
|
|
const [isListening, setIsListening] = useState(false);
|
|
const [isAvailable, setIsAvailable] = useState(!!ExpoSpeechRecognitionModule);
|
|
const [recognizedText, setRecognizedText] = useState('');
|
|
const [partialTranscript, setPartialTranscript] = useState('');
|
|
const [error, setError] = useState<string | null>(null);
|
|
|
|
// Track if we're in the middle of starting to prevent double-starts
|
|
const isStartingRef = useRef(false);
|
|
// Track if voice has been detected in current session (for onVoiceDetected callback)
|
|
const voiceDetectedRef = useRef(false);
|
|
|
|
// Check availability on mount
|
|
useEffect(() => {
|
|
if (!ExpoSpeechRecognitionModule) {
|
|
setIsAvailable(false);
|
|
return;
|
|
}
|
|
|
|
const checkAvailability = async () => {
|
|
try {
|
|
// Check if we can get permissions (indirect availability check)
|
|
const status = await ExpoSpeechRecognitionModule.getPermissionsAsync();
|
|
// If we can query permissions, the module is available
|
|
setIsAvailable(true);
|
|
console.log('[SpeechRecognition] Available, permission status:', status.status);
|
|
} catch (err) {
|
|
console.warn('[SpeechRecognition] Not available:', err);
|
|
setIsAvailable(false);
|
|
}
|
|
};
|
|
checkAvailability();
|
|
}, []);
|
|
|
|
// Event: Recognition started
|
|
useSpeechRecognitionEvent('start', () => {
|
|
console.log('[SpeechRecognition] Started');
|
|
setIsListening(true);
|
|
setError(null);
|
|
isStartingRef.current = false;
|
|
voiceDetectedRef.current = false; // Reset voice detection flag for new session
|
|
onStart?.();
|
|
});
|
|
|
|
// Event: Recognition ended
|
|
useSpeechRecognitionEvent('end', () => {
|
|
console.log('[SpeechRecognition] Ended');
|
|
setIsListening(false);
|
|
setPartialTranscript('');
|
|
isStartingRef.current = false;
|
|
voiceDetectedRef.current = false; // Reset for next session
|
|
onEnd?.();
|
|
});
|
|
|
|
// Event: Result available
|
|
useSpeechRecognitionEvent('result', (event: any) => {
|
|
const results = event.results;
|
|
if (results && results.length > 0) {
|
|
const result = results[results.length - 1];
|
|
const transcript = result?.transcript || '';
|
|
const isFinal = event.isFinal ?? false;
|
|
|
|
console.log('[SpeechRecognition] Result:', transcript.slice(0, 50), 'final:', isFinal);
|
|
|
|
// Trigger onVoiceDetected on first result (voice activity detected)
|
|
if (!voiceDetectedRef.current && transcript.length > 0) {
|
|
voiceDetectedRef.current = true;
|
|
console.log('[SpeechRecognition] Voice activity detected');
|
|
onVoiceDetected?.();
|
|
}
|
|
|
|
if (isFinal) {
|
|
setRecognizedText(transcript);
|
|
setPartialTranscript('');
|
|
} else {
|
|
setPartialTranscript(transcript);
|
|
}
|
|
|
|
onResult?.(transcript, isFinal);
|
|
}
|
|
});
|
|
|
|
// Event: Error occurred
|
|
useSpeechRecognitionEvent('error', (event: any) => {
|
|
const errorCode = event.error || '';
|
|
const errorMessage = event.message || errorCode || 'Speech recognition error';
|
|
|
|
// "no-speech" is normal when user is silent — ignore completely
|
|
if (errorCode === 'no-speech') {
|
|
console.log('[SpeechRecognition] No speech detected (silence) - ignoring');
|
|
setIsListening(false);
|
|
isStartingRef.current = false;
|
|
return;
|
|
}
|
|
|
|
console.warn('[SpeechRecognition] Error:', errorMessage);
|
|
setError(errorMessage);
|
|
onError?.(errorMessage);
|
|
setIsListening(false);
|
|
isStartingRef.current = false;
|
|
});
|
|
|
|
/**
|
|
* Start listening for speech
|
|
* @returns true if started successfully, false otherwise
|
|
*/
|
|
const startListening = useCallback(async (): Promise<boolean> => {
|
|
if (!ExpoSpeechRecognitionModule) {
|
|
console.warn('[SpeechRecognition] Cannot start - native module not available');
|
|
return false;
|
|
}
|
|
|
|
if (isListening || isStartingRef.current) {
|
|
console.log('[SpeechRecognition] Already listening or starting');
|
|
return false;
|
|
}
|
|
|
|
if (!isAvailable) {
|
|
const msg = 'Speech recognition is not available on this device';
|
|
console.warn('[SpeechRecognition]', msg);
|
|
setError(msg);
|
|
onError?.(msg);
|
|
return false;
|
|
}
|
|
|
|
isStartingRef.current = true;
|
|
setError(null);
|
|
setRecognizedText('');
|
|
setPartialTranscript('');
|
|
|
|
try {
|
|
// Request permissions
|
|
const permissionResult = await ExpoSpeechRecognitionModule.requestPermissionsAsync();
|
|
|
|
if (!permissionResult.granted) {
|
|
const msg = 'Microphone permission denied';
|
|
console.warn('[SpeechRecognition]', msg);
|
|
setError(msg);
|
|
onError?.(msg);
|
|
isStartingRef.current = false;
|
|
return false;
|
|
}
|
|
|
|
console.log('[SpeechRecognition] Starting with lang:', lang);
|
|
|
|
// Start recognition
|
|
ExpoSpeechRecognitionModule.start({
|
|
lang,
|
|
interimResults,
|
|
continuous,
|
|
addsPunctuation: Platform.OS === 'ios' ? addsPunctuation : undefined,
|
|
// Android-specific: longer silence timeout for more natural pauses
|
|
androidIntentOptions: Platform.OS === 'android' ? {
|
|
EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS: 2000,
|
|
EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS: 1500,
|
|
} : undefined,
|
|
});
|
|
|
|
return true;
|
|
} catch (err) {
|
|
const msg = err instanceof Error ? err.message : 'Failed to start speech recognition';
|
|
console.warn('[SpeechRecognition] Start error:', msg);
|
|
setError(msg);
|
|
onError?.(msg);
|
|
isStartingRef.current = false;
|
|
return false;
|
|
}
|
|
}, [isListening, isAvailable, lang, interimResults, continuous, addsPunctuation, onError]);
|
|
|
|
/**
|
|
* Stop listening and process final result
|
|
*/
|
|
const stopListening = useCallback(() => {
|
|
if (!ExpoSpeechRecognitionModule) return;
|
|
|
|
if (!isListening && !isStartingRef.current) {
|
|
console.log('[SpeechRecognition] Not listening, nothing to stop');
|
|
return;
|
|
}
|
|
|
|
console.log('[SpeechRecognition] Stopping...');
|
|
try {
|
|
ExpoSpeechRecognitionModule.stop();
|
|
} catch (err) {
|
|
console.warn('[SpeechRecognition] Stop error:', err);
|
|
}
|
|
}, [isListening]);
|
|
|
|
/**
|
|
* Abort listening without processing
|
|
*/
|
|
const abortListening = useCallback(() => {
|
|
if (!ExpoSpeechRecognitionModule) return;
|
|
|
|
if (!isListening && !isStartingRef.current) {
|
|
return;
|
|
}
|
|
|
|
console.log('[SpeechRecognition] Aborting...');
|
|
try {
|
|
ExpoSpeechRecognitionModule.abort();
|
|
} catch (err) {
|
|
console.warn('[SpeechRecognition] Abort error:', err);
|
|
}
|
|
|
|
setIsListening(false);
|
|
setPartialTranscript('');
|
|
isStartingRef.current = false;
|
|
}, [isListening]);
|
|
|
|
/**
|
|
* Reset state
|
|
*/
|
|
const reset = useCallback(() => {
|
|
setRecognizedText('');
|
|
setPartialTranscript('');
|
|
setError(null);
|
|
}, []);
|
|
|
|
return {
|
|
startListening,
|
|
stopListening,
|
|
abortListening,
|
|
isListening,
|
|
isAvailable,
|
|
recognizedText,
|
|
partialTranscript,
|
|
error,
|
|
reset,
|
|
};
|
|
}
|