Core TTS infrastructure: - sherpaTTS.ts: Sherpa ONNX integration for offline TTS - TTSErrorBoundary.tsx: Error boundary for TTS failures - ErrorBoundary.tsx: Generic error boundary component - VoiceIndicator.tsx: Visual indicator for voice activity - useSpeechRecognition.ts: Speech-to-text hook - DebugLogger.ts: Debug logging utility Features: - Offline voice synthesis (no internet needed) - Multiple voices support - Real-time voice activity indication - Error recovery and fallback - Debug logging for troubleshooting Tech stack: - Sherpa ONNX runtime - React Native Audio - Expo modules
143 lines
5.3 KiB
TypeScript
143 lines
5.3 KiB
TypeScript
import { useState, useEffect, useCallback, useRef } from 'react';
|
|
import { Platform, Alert } from 'react-native';
|
|
import { debugLogger } from '@/services/DebugLogger';
|
|
|
|
// Try to import native module
|
|
let ExpoSpeechRecognitionModule: any = null;
|
|
let SPEECH_RECOGNITION_AVAILABLE = false;
|
|
try {
|
|
const speechRecognition = require('expo-speech-recognition');
|
|
ExpoSpeechRecognitionModule = speechRecognition.ExpoSpeechRecognitionModule;
|
|
if (ExpoSpeechRecognitionModule) {
|
|
SPEECH_RECOGNITION_AVAILABLE = true;
|
|
debugLogger.info('STT', 'Speech recognition module loaded successfully');
|
|
}
|
|
} catch (e) {
|
|
debugLogger.warn('STT', 'Speech recognition not available', e);
|
|
console.log('[useSpeechRecognition] expo-speech-recognition not available');
|
|
}
|
|
|
|
export interface SpeechRecognitionResult {
|
|
transcript: string;
|
|
isFinal: boolean;
|
|
}
|
|
|
|
export interface UseSpeechRecognitionReturn {
|
|
isListening: boolean;
|
|
recognizedText: string;
|
|
startListening: (options?: { continuous?: boolean }) => Promise<void>;
|
|
stopListening: () => void;
|
|
isAvailable: boolean;
|
|
hasPermission: boolean;
|
|
requestPermission: () => Promise<boolean>;
|
|
}
|
|
|
|
export function useSpeechRecognition(): UseSpeechRecognitionReturn {
|
|
const [isListening, setIsListening] = useState(false);
|
|
const [recognizedText, setRecognizedText] = useState('');
|
|
const [hasPermission, setHasPermission] = useState(false);
|
|
|
|
// Callbacks
|
|
const onResultRef = useRef<((result: SpeechRecognitionResult) => void) | null>(null);
|
|
|
|
useEffect(() => {
|
|
if (!SPEECH_RECOGNITION_AVAILABLE || !ExpoSpeechRecognitionModule) {
|
|
debugLogger.warn('STT', 'Cannot setup listeners - module not available');
|
|
return;
|
|
}
|
|
|
|
debugLogger.info('STT', 'Setting up speech recognition event listeners');
|
|
const subscriptions: any[] = [];
|
|
|
|
if (ExpoSpeechRecognitionModule.addListener) {
|
|
subscriptions.push(
|
|
ExpoSpeechRecognitionModule.addListener('start', () => {
|
|
debugLogger.info('STT', 'Speech recognition started');
|
|
setIsListening(true);
|
|
})
|
|
);
|
|
subscriptions.push(
|
|
ExpoSpeechRecognitionModule.addListener('end', () => {
|
|
debugLogger.info('STT', 'Speech recognition ended');
|
|
setIsListening(false);
|
|
})
|
|
);
|
|
subscriptions.push(
|
|
ExpoSpeechRecognitionModule.addListener('result', (event: any) => {
|
|
const transcript = event.results?.[0]?.transcript || '';
|
|
const isFinal = event.results?.[0]?.isFinal || false;
|
|
debugLogger.log('STT', `Recognized: "${transcript}" (${isFinal ? 'FINAL' : 'interim'})`);
|
|
setRecognizedText(transcript);
|
|
})
|
|
);
|
|
subscriptions.push(
|
|
ExpoSpeechRecognitionModule.addListener('error', (event: any) => {
|
|
debugLogger.error('STT', 'Speech recognition error', event);
|
|
setIsListening(false);
|
|
console.warn('[Speech] Error:', event);
|
|
})
|
|
);
|
|
}
|
|
|
|
return () => {
|
|
debugLogger.info('STT', 'Cleaning up speech recognition listeners');
|
|
subscriptions.forEach(sub => sub.remove?.());
|
|
};
|
|
}, []);
|
|
|
|
const requestPermission = async () => {
|
|
if (!SPEECH_RECOGNITION_AVAILABLE) {
|
|
debugLogger.warn('STT', 'Cannot request permission - module not available');
|
|
return false;
|
|
}
|
|
debugLogger.info('STT', 'Requesting microphone permissions');
|
|
const result = await ExpoSpeechRecognitionModule.requestPermissionsAsync();
|
|
setHasPermission(result.granted);
|
|
debugLogger.log('STT', `Permission ${result.granted ? 'granted' : 'denied'}`);
|
|
return result.granted;
|
|
};
|
|
|
|
const startListening = async (options?: { continuous?: boolean }) => {
|
|
if (!SPEECH_RECOGNITION_AVAILABLE) {
|
|
debugLogger.error('STT', 'Cannot start - speech recognition not available');
|
|
Alert.alert('Not Available', 'Voice recognition is not available on this device.');
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// Reset text
|
|
setRecognizedText('');
|
|
debugLogger.info('STT', `Starting speech recognition (continuous: ${options?.continuous ?? false})`);
|
|
|
|
await ExpoSpeechRecognitionModule.start({
|
|
lang: 'en-US',
|
|
interimResults: true,
|
|
maxAlternatives: 1,
|
|
continuous: options?.continuous ?? false,
|
|
});
|
|
} catch (e) {
|
|
debugLogger.error('STT', 'Failed to start listening', e);
|
|
console.error('Failed to start listening', e);
|
|
setIsListening(false);
|
|
}
|
|
};
|
|
|
|
const stopListening = () => {
|
|
debugLogger.info('STT', 'Stopping speech recognition');
|
|
if (SPEECH_RECOGNITION_AVAILABLE) {
|
|
ExpoSpeechRecognitionModule.stop();
|
|
}
|
|
setIsListening(false);
|
|
};
|
|
|
|
return {
|
|
isListening,
|
|
recognizedText,
|
|
startListening,
|
|
stopListening,
|
|
isAvailable: SPEECH_RECOGNITION_AVAILABLE,
|
|
hasPermission,
|
|
requestPermission
|
|
};
|
|
}
|