Update main project + add WellNuoLite
- WellNuoLite: облегчённая версия для модерации Apple - Обновлены chat и voice tabs - Добавлены TTS модели и сервисы - Обновлены зависимости
This commit is contained in:
parent
c80fd4ab4b
commit
b740762609
1
WellNuoLite
Submodule
1
WellNuoLite
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 5e550f0f2b88ecd308eb23759491ced251421e15
|
||||
4
app.json
4
app.json
@ -12,6 +12,7 @@
|
||||
"supportsTablet": true,
|
||||
"bundleIdentifier": "com.wellnuo.BluetoothScanner",
|
||||
"appleTeamId": "UHLZD54ULZ",
|
||||
"deploymentTarget": "16.0",
|
||||
"infoPlist": {
|
||||
"ITSAppUsesNonExemptEncryption": false
|
||||
}
|
||||
@ -46,7 +47,8 @@
|
||||
}
|
||||
],
|
||||
"expo-speech-recognition",
|
||||
"expo-audio"
|
||||
"expo-audio",
|
||||
"./plugins/withTTSModels.js"
|
||||
],
|
||||
"experiments": {
|
||||
"typedRoutes": true,
|
||||
|
||||
@ -78,7 +78,7 @@ export default function TabLayout() {
|
||||
href: null,
|
||||
}}
|
||||
/>
|
||||
{/* Hide voice tab - replaced by chat */}
|
||||
{/* Voice assistant - hidden, integrated into Chat */}
|
||||
<Tabs.Screen
|
||||
name="voice"
|
||||
options={{
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import React, { useState, useCallback, useRef } from 'react';
|
||||
import React, { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import {
|
||||
View,
|
||||
Text,
|
||||
@ -9,13 +9,30 @@ import {
|
||||
KeyboardAvoidingView,
|
||||
Platform,
|
||||
Alert,
|
||||
ActivityIndicator,
|
||||
Modal,
|
||||
ScrollView,
|
||||
} from 'react-native';
|
||||
import { Ionicons } from '@expo/vector-icons';
|
||||
import { SafeAreaView } from 'react-native-safe-area-context';
|
||||
import { useFocusEffect } from 'expo-router';
|
||||
import { api } from '@/services/api';
|
||||
import { useBeneficiary } from '@/contexts/BeneficiaryContext';
|
||||
import { AppColors, BorderRadius, FontSizes, Spacing } from '@/constants/theme';
|
||||
import type { Message } from '@/types';
|
||||
import { useTTS } from '@/hooks/useTTS';
|
||||
import { AVAILABLE_VOICES, getCurrentVoice, setVoice, type PiperVoice } from '@/services/sherpaTTS';
|
||||
|
||||
// Try to import speech recognition if available
|
||||
let ExpoSpeechRecognitionModule: any = null;
|
||||
let useSpeechRecognitionEvent: any = null;
|
||||
try {
|
||||
const speechRecognition = require('expo-speech-recognition');
|
||||
ExpoSpeechRecognitionModule = speechRecognition.ExpoSpeechRecognitionModule;
|
||||
useSpeechRecognitionEvent = speechRecognition.useSpeechRecognitionEvent;
|
||||
} catch (e) {
|
||||
console.log('expo-speech-recognition not available');
|
||||
}
|
||||
|
||||
export default function ChatScreen() {
|
||||
const { currentBeneficiary, getBeneficiaryContext } = useBeneficiary();
|
||||
@ -29,10 +46,136 @@ export default function ChatScreen() {
|
||||
]);
|
||||
const [input, setInput] = useState('');
|
||||
const [isSending, setIsSending] = useState(false);
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const [recognizedText, setRecognizedText] = useState('');
|
||||
const [showVoicePicker, setShowVoicePicker] = useState(false);
|
||||
const [selectedVoice, setSelectedVoice] = useState<PiperVoice>(getCurrentVoice());
|
||||
const [isChangingVoice, setIsChangingVoice] = useState(false);
|
||||
const [voiceModeEnabled, setVoiceModeEnabled] = useState(false); // Voice Mode toggle
|
||||
const flatListRef = useRef<FlatList>(null);
|
||||
const lastSendTimeRef = useRef<number>(0);
|
||||
const SEND_COOLDOWN_MS = 1000; // 1 second cooldown between messages
|
||||
|
||||
// TTS hook for speaking responses
|
||||
const { speak, stop, isSpeaking } = useTTS();
|
||||
|
||||
// Stop TTS and mic when navigating away from screen
|
||||
useFocusEffect(
|
||||
useCallback(() => {
|
||||
// Screen focused
|
||||
return () => {
|
||||
// Screen unfocused - cleanup
|
||||
stop(); // Stop any playing TTS
|
||||
if (ExpoSpeechRecognitionModule && isListening) {
|
||||
ExpoSpeechRecognitionModule.stop();
|
||||
setIsListening(false);
|
||||
}
|
||||
setVoiceModeEnabled(false); // Disable voice mode on leave
|
||||
};
|
||||
}, [stop, isListening])
|
||||
);
|
||||
|
||||
// Handle voice change
|
||||
const handleVoiceChange = useCallback(async (voice: PiperVoice) => {
|
||||
if (voice.id === selectedVoice.id) {
|
||||
setShowVoicePicker(false);
|
||||
return;
|
||||
}
|
||||
|
||||
setIsChangingVoice(true);
|
||||
try {
|
||||
const success = await setVoice(voice.id);
|
||||
if (success) {
|
||||
setSelectedVoice(voice);
|
||||
// Test the new voice
|
||||
speak(`Hello, I'm ${voice.name}. How can I help you?`);
|
||||
} else {
|
||||
Alert.alert('Error', `Failed to switch to ${voice.name} voice.`);
|
||||
}
|
||||
} catch (error) {
|
||||
Alert.alert('Error', 'Failed to change voice.');
|
||||
} finally {
|
||||
setIsChangingVoice(false);
|
||||
setShowVoicePicker(false);
|
||||
}
|
||||
}, [selectedVoice, speak]);
|
||||
|
||||
// Speech recognition events (if available)
|
||||
useEffect(() => {
|
||||
if (!useSpeechRecognitionEvent) return;
|
||||
|
||||
// Handle recognized speech result
|
||||
const resultSubscription = useSpeechRecognitionEvent('result', (event: any) => {
|
||||
const transcript = event.results?.[0]?.transcript || '';
|
||||
setRecognizedText(transcript);
|
||||
if (event.isFinal) {
|
||||
setInput(transcript);
|
||||
setIsListening(false);
|
||||
}
|
||||
});
|
||||
|
||||
// Handle errors
|
||||
const errorSubscription = useSpeechRecognitionEvent('error', (event: any) => {
|
||||
console.log('Speech recognition error:', event.error);
|
||||
setIsListening(false);
|
||||
});
|
||||
|
||||
// Handle end
|
||||
const endSubscription = useSpeechRecognitionEvent('end', () => {
|
||||
setIsListening(false);
|
||||
});
|
||||
|
||||
return () => {
|
||||
resultSubscription?.remove?.();
|
||||
errorSubscription?.remove?.();
|
||||
endSubscription?.remove?.();
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Start voice input
|
||||
const startListening = useCallback(async () => {
|
||||
if (!ExpoSpeechRecognitionModule) {
|
||||
Alert.alert('Not Available', 'Voice input is not available on this device.');
|
||||
return;
|
||||
}
|
||||
|
||||
// PREVENT SELF-RECORDING: Don't start mic while TTS is speaking
|
||||
if (isSpeaking) {
|
||||
console.log('[Voice] Blocked: TTS is still speaking');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await ExpoSpeechRecognitionModule.requestPermissionsAsync();
|
||||
if (!result.granted) {
|
||||
Alert.alert('Permission Denied', 'Please enable microphone access to use voice input.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Enable voice mode when user starts listening
|
||||
setVoiceModeEnabled(true);
|
||||
setIsListening(true);
|
||||
setRecognizedText('');
|
||||
ExpoSpeechRecognitionModule.start({
|
||||
lang: 'en-US',
|
||||
interimResults: true,
|
||||
maxAlternatives: 1,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Failed to start speech recognition:', error);
|
||||
setIsListening(false);
|
||||
Alert.alert('Error', 'Failed to start voice input.');
|
||||
}
|
||||
}, [isSpeaking]);
|
||||
|
||||
// Stop voice input
|
||||
const stopListening = useCallback(() => {
|
||||
if (ExpoSpeechRecognitionModule) {
|
||||
ExpoSpeechRecognitionModule.stop();
|
||||
}
|
||||
setIsListening(false);
|
||||
}, []);
|
||||
|
||||
const handleSend = useCallback(async () => {
|
||||
const trimmedInput = input.trim();
|
||||
if (!trimmedInput || isSending) return;
|
||||
@ -77,13 +220,16 @@ export default function ChatScreen() {
|
||||
const response = await api.sendMessage(questionWithContext, deploymentId);
|
||||
|
||||
if (response.ok && response.data?.response) {
|
||||
const responseText = response.data.response.body;
|
||||
const assistantMessage: Message = {
|
||||
id: (Date.now() + 1).toString(),
|
||||
role: 'assistant',
|
||||
content: response.data.response.body,
|
||||
content: responseText,
|
||||
timestamp: new Date(),
|
||||
};
|
||||
setMessages((prev) => [...prev, assistantMessage]);
|
||||
// Speak the response using neural TTS
|
||||
speak(responseText);
|
||||
} else {
|
||||
const errorMessage: Message = {
|
||||
id: (Date.now() + 1).toString(),
|
||||
@ -162,12 +308,38 @@ export default function ChatScreen() {
|
||||
</Text>
|
||||
</View>
|
||||
</View>
|
||||
<TouchableOpacity
|
||||
style={styles.headerButton}
|
||||
onPress={() => Alert.alert('Coming Soon', 'Chat settings will be available in a future update.')}
|
||||
>
|
||||
<Ionicons name="ellipsis-vertical" size={24} color={AppColors.textPrimary} />
|
||||
</TouchableOpacity>
|
||||
<View style={styles.headerButtons}>
|
||||
{/* Voice Mode Toggle */}
|
||||
{voiceModeEnabled && (
|
||||
<TouchableOpacity
|
||||
style={[styles.headerButton, styles.voiceModeActive]}
|
||||
onPress={() => {
|
||||
stop();
|
||||
stopListening();
|
||||
setVoiceModeEnabled(false);
|
||||
}}
|
||||
>
|
||||
<Ionicons name="mic" size={20} color={AppColors.white} />
|
||||
<Text style={styles.voiceModeText}>ON</Text>
|
||||
</TouchableOpacity>
|
||||
)}
|
||||
{/* TTS Speaking indicator */}
|
||||
{isSpeaking && (
|
||||
<TouchableOpacity
|
||||
style={[styles.headerButton, styles.speakingIndicator]}
|
||||
onPress={stop}
|
||||
>
|
||||
<Ionicons name="volume-high" size={20} color={AppColors.white} />
|
||||
</TouchableOpacity>
|
||||
)}
|
||||
{/* Voice Picker */}
|
||||
<TouchableOpacity
|
||||
style={styles.headerButton}
|
||||
onPress={() => setShowVoicePicker(true)}
|
||||
>
|
||||
<Ionicons name="volume-high-outline" size={24} color={AppColors.textPrimary} />
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
</View>
|
||||
|
||||
{/* Messages */}
|
||||
@ -186,8 +358,34 @@ export default function ChatScreen() {
|
||||
onContentSizeChange={() => flatListRef.current?.scrollToEnd({ animated: true })}
|
||||
/>
|
||||
|
||||
{/* Listening indicator */}
|
||||
{isListening && (
|
||||
<View style={styles.listeningIndicator}>
|
||||
<ActivityIndicator color={AppColors.primary} size="small" />
|
||||
<Text style={styles.listeningText}>
|
||||
{recognizedText || 'Listening...'}
|
||||
</Text>
|
||||
<TouchableOpacity onPress={stopListening} style={styles.stopButton}>
|
||||
<Ionicons name="close-circle" size={24} color={AppColors.error} />
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
)}
|
||||
|
||||
{/* Input */}
|
||||
<View style={styles.inputContainer}>
|
||||
{/* Microphone button */}
|
||||
<TouchableOpacity
|
||||
style={[styles.micButton, isListening && styles.micButtonActive]}
|
||||
onPress={isListening ? stopListening : startListening}
|
||||
disabled={isSending}
|
||||
>
|
||||
<Ionicons
|
||||
name={isListening ? 'mic' : 'mic-outline'}
|
||||
size={22}
|
||||
color={isListening ? AppColors.white : AppColors.primary}
|
||||
/>
|
||||
</TouchableOpacity>
|
||||
|
||||
<TextInput
|
||||
style={styles.input}
|
||||
placeholder="Type a message..."
|
||||
@ -196,7 +394,7 @@ export default function ChatScreen() {
|
||||
onChangeText={setInput}
|
||||
multiline
|
||||
maxLength={1000}
|
||||
editable={!isSending}
|
||||
editable={!isSending && !isListening}
|
||||
onSubmitEditing={handleSend}
|
||||
/>
|
||||
<TouchableOpacity
|
||||
@ -212,6 +410,82 @@ export default function ChatScreen() {
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
</KeyboardAvoidingView>
|
||||
|
||||
{/* Voice Picker Modal */}
|
||||
<Modal
|
||||
visible={showVoicePicker}
|
||||
animationType="slide"
|
||||
transparent={true}
|
||||
onRequestClose={() => setShowVoicePicker(false)}
|
||||
>
|
||||
<View style={styles.modalOverlay}>
|
||||
<View style={styles.modalContent}>
|
||||
<View style={styles.modalHeader}>
|
||||
<Text style={styles.modalTitle}>Select Voice</Text>
|
||||
<TouchableOpacity
|
||||
style={styles.modalCloseButton}
|
||||
onPress={() => setShowVoicePicker(false)}
|
||||
>
|
||||
<Ionicons name="close" size={24} color={AppColors.textPrimary} />
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
|
||||
<Text style={styles.modalSubtitle}>
|
||||
Neural TTS voices for Julia AI
|
||||
</Text>
|
||||
|
||||
<ScrollView style={styles.voiceList}>
|
||||
{AVAILABLE_VOICES.map((voice) => (
|
||||
<TouchableOpacity
|
||||
key={voice.id}
|
||||
style={[
|
||||
styles.voiceItem,
|
||||
selectedVoice.id === voice.id && styles.voiceItemSelected,
|
||||
]}
|
||||
onPress={() => handleVoiceChange(voice)}
|
||||
disabled={isChangingVoice}
|
||||
>
|
||||
<View style={styles.voiceIcon}>
|
||||
<Ionicons
|
||||
name={voice.gender === 'female' ? 'woman' : 'man'}
|
||||
size={24}
|
||||
color={selectedVoice.id === voice.id ? AppColors.white : AppColors.primary}
|
||||
/>
|
||||
</View>
|
||||
<View style={styles.voiceInfo}>
|
||||
<Text
|
||||
style={[
|
||||
styles.voiceName,
|
||||
selectedVoice.id === voice.id && styles.voiceNameSelected,
|
||||
]}
|
||||
>
|
||||
{voice.name}
|
||||
</Text>
|
||||
<Text
|
||||
style={[
|
||||
styles.voiceDescription,
|
||||
selectedVoice.id === voice.id && styles.voiceDescriptionSelected,
|
||||
]}
|
||||
>
|
||||
{voice.description}
|
||||
</Text>
|
||||
</View>
|
||||
{selectedVoice.id === voice.id && (
|
||||
<Ionicons name="checkmark-circle" size={24} color={AppColors.white} />
|
||||
)}
|
||||
{isChangingVoice && selectedVoice.id !== voice.id && (
|
||||
<ActivityIndicator size="small" color={AppColors.primary} />
|
||||
)}
|
||||
</TouchableOpacity>
|
||||
))}
|
||||
</ScrollView>
|
||||
|
||||
<Text style={styles.voiceHint}>
|
||||
Tap a voice to hear a preview
|
||||
</Text>
|
||||
</View>
|
||||
</View>
|
||||
</Modal>
|
||||
</SafeAreaView>
|
||||
);
|
||||
}
|
||||
@ -258,9 +532,33 @@ const styles = StyleSheet.create({
|
||||
fontSize: FontSizes.sm,
|
||||
color: AppColors.success,
|
||||
},
|
||||
headerButtons: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
gap: 8,
|
||||
},
|
||||
headerButton: {
|
||||
padding: Spacing.xs,
|
||||
},
|
||||
voiceModeActive: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
backgroundColor: AppColors.primary,
|
||||
borderRadius: BorderRadius.md,
|
||||
paddingHorizontal: 10,
|
||||
paddingVertical: 6,
|
||||
gap: 4,
|
||||
},
|
||||
voiceModeText: {
|
||||
color: AppColors.white,
|
||||
fontSize: FontSizes.xs,
|
||||
fontWeight: '600',
|
||||
},
|
||||
speakingIndicator: {
|
||||
backgroundColor: AppColors.success,
|
||||
borderRadius: BorderRadius.md,
|
||||
padding: 6,
|
||||
},
|
||||
chatContainer: {
|
||||
flex: 1,
|
||||
},
|
||||
@ -355,4 +653,123 @@ const styles = StyleSheet.create({
|
||||
sendButtonDisabled: {
|
||||
backgroundColor: AppColors.surface,
|
||||
},
|
||||
micButton: {
|
||||
width: 44,
|
||||
height: 44,
|
||||
borderRadius: BorderRadius.full,
|
||||
backgroundColor: AppColors.surface,
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
marginRight: Spacing.sm,
|
||||
borderWidth: 1,
|
||||
borderColor: AppColors.primary,
|
||||
},
|
||||
micButtonActive: {
|
||||
backgroundColor: AppColors.primary,
|
||||
borderColor: AppColors.primary,
|
||||
},
|
||||
listeningIndicator: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
paddingHorizontal: Spacing.md,
|
||||
paddingVertical: Spacing.sm,
|
||||
backgroundColor: AppColors.surface,
|
||||
borderTopWidth: 1,
|
||||
borderTopColor: AppColors.border,
|
||||
},
|
||||
listeningText: {
|
||||
flex: 1,
|
||||
marginLeft: Spacing.sm,
|
||||
fontSize: FontSizes.sm,
|
||||
color: AppColors.textSecondary,
|
||||
fontStyle: 'italic',
|
||||
},
|
||||
stopButton: {
|
||||
padding: Spacing.xs,
|
||||
},
|
||||
// Voice Picker Modal styles
|
||||
modalOverlay: {
|
||||
flex: 1,
|
||||
backgroundColor: 'rgba(0, 0, 0, 0.5)',
|
||||
justifyContent: 'flex-end',
|
||||
},
|
||||
modalContent: {
|
||||
backgroundColor: AppColors.background,
|
||||
borderTopLeftRadius: BorderRadius.xl,
|
||||
borderTopRightRadius: BorderRadius.xl,
|
||||
paddingTop: Spacing.lg,
|
||||
paddingBottom: Spacing.xl + 20,
|
||||
maxHeight: '60%',
|
||||
},
|
||||
modalHeader: {
|
||||
flexDirection: 'row',
|
||||
justifyContent: 'space-between',
|
||||
alignItems: 'center',
|
||||
paddingHorizontal: Spacing.lg,
|
||||
marginBottom: Spacing.sm,
|
||||
},
|
||||
modalTitle: {
|
||||
fontSize: FontSizes.xl,
|
||||
fontWeight: '600',
|
||||
color: AppColors.textPrimary,
|
||||
},
|
||||
modalCloseButton: {
|
||||
padding: Spacing.xs,
|
||||
},
|
||||
modalSubtitle: {
|
||||
fontSize: FontSizes.sm,
|
||||
color: AppColors.textSecondary,
|
||||
paddingHorizontal: Spacing.lg,
|
||||
marginBottom: Spacing.md,
|
||||
},
|
||||
voiceList: {
|
||||
paddingHorizontal: Spacing.lg,
|
||||
},
|
||||
voiceItem: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
padding: Spacing.md,
|
||||
backgroundColor: AppColors.surface,
|
||||
borderRadius: BorderRadius.lg,
|
||||
marginBottom: Spacing.sm,
|
||||
borderWidth: 2,
|
||||
borderColor: 'transparent',
|
||||
},
|
||||
voiceItemSelected: {
|
||||
borderColor: AppColors.primary,
|
||||
backgroundColor: `${AppColors.primary}15`,
|
||||
},
|
||||
voiceIcon: {
|
||||
width: 48,
|
||||
height: 48,
|
||||
borderRadius: BorderRadius.full,
|
||||
backgroundColor: AppColors.primary + '20',
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
marginRight: Spacing.md,
|
||||
},
|
||||
voiceInfo: {
|
||||
flex: 1,
|
||||
},
|
||||
voiceName: {
|
||||
fontSize: FontSizes.base,
|
||||
fontWeight: '600',
|
||||
color: AppColors.textPrimary,
|
||||
marginBottom: 2,
|
||||
},
|
||||
voiceNameSelected: {
|
||||
color: AppColors.primary,
|
||||
},
|
||||
voiceDescription: {
|
||||
fontSize: FontSizes.sm,
|
||||
color: AppColors.textSecondary,
|
||||
},
|
||||
voiceDescriptionSelected: {
|
||||
color: AppColors.primary,
|
||||
},
|
||||
voiceHint: {
|
||||
fontSize: FontSizes.xs,
|
||||
color: AppColors.textSecondary,
|
||||
marginTop: 4,
|
||||
},
|
||||
});
|
||||
|
||||
@ -21,6 +21,7 @@ import * as Speech from 'expo-speech';
|
||||
import { ExpoSpeechRecognitionModule, useSpeechRecognitionEvent } from 'expo-speech-recognition';
|
||||
import { useBeneficiary } from '@/contexts/BeneficiaryContext';
|
||||
import { api } from '@/services/api';
|
||||
import SherpaTTS from '@/services/sherpaTTS';
|
||||
import { AppColors, BorderRadius, FontSizes, Spacing } from '@/constants/theme';
|
||||
import type { Message, Beneficiary } from '@/types';
|
||||
|
||||
@ -137,6 +138,8 @@ export default function VoiceAIScreen() {
|
||||
// DEV ONLY: Voice selection for testing
|
||||
const [selectedVoice, setSelectedVoice] = useState<VoiceOption>(AVAILABLE_VOICES[0]);
|
||||
const [showVoicePicker, setShowVoicePicker] = useState(false);
|
||||
const [sherpaTTSReady, setSherpaTTSReady] = useState(false);
|
||||
const [useNeuralTTS, setUseNeuralTTS] = useState(true); // Use Sherpa TTS by default
|
||||
const flatListRef = useRef<FlatList>(null);
|
||||
const lastSendTimeRef = useRef<number>(0);
|
||||
const pulseAnim = useRef(new Animated.Value(1)).current;
|
||||
@ -174,11 +177,33 @@ export default function VoiceAIScreen() {
|
||||
}
|
||||
});
|
||||
|
||||
// Load beneficiaries on mount
|
||||
// Load beneficiaries on mount and initialize Sherpa TTS
|
||||
useEffect(() => {
|
||||
loadBeneficiaries();
|
||||
|
||||
// Initialize Sherpa TTS (neural offline voice)
|
||||
const initTTS = async () => {
|
||||
try {
|
||||
console.log('[Voice] Initializing Sherpa TTS...');
|
||||
const success = await SherpaTTS.initialize();
|
||||
setSherpaTTSReady(success);
|
||||
if (success) {
|
||||
console.log('[Voice] Sherpa TTS ready - using neural voice');
|
||||
} else {
|
||||
console.log('[Voice] Sherpa TTS failed, falling back to system TTS');
|
||||
setUseNeuralTTS(false);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[Voice] Sherpa TTS init error:', error);
|
||||
setUseNeuralTTS(false);
|
||||
}
|
||||
};
|
||||
initTTS();
|
||||
|
||||
return () => {
|
||||
Speech.stop();
|
||||
SherpaTTS.stop();
|
||||
SherpaTTS.deinitialize();
|
||||
};
|
||||
}, []);
|
||||
|
||||
@ -415,35 +440,58 @@ Based on this data, please answer the following question: ${question}`;
|
||||
}
|
||||
};
|
||||
|
||||
// Text-to-Speech using expo-speech (works out of the box)
|
||||
// Text-to-Speech - uses Sherpa neural TTS if available, falls back to expo-speech
|
||||
const speakResponse = async (text: string, autoListenAfter: boolean = false) => {
|
||||
setIsSpeaking(true);
|
||||
try {
|
||||
const speechOptions: Speech.SpeechOptions = {
|
||||
language: selectedVoice.language,
|
||||
pitch: 1.0,
|
||||
rate: 0.9,
|
||||
onDone: () => {
|
||||
setIsSpeaking(false);
|
||||
if (autoListenAfter && isContinuousMode && currentBeneficiary?.id) {
|
||||
setTimeout(() => {
|
||||
startListeningInternal();
|
||||
}, 500);
|
||||
}
|
||||
},
|
||||
onError: () => setIsSpeaking(false),
|
||||
};
|
||||
// Add specific voice if available (iOS only)
|
||||
if (selectedVoice.voice) {
|
||||
speechOptions.voice = selectedVoice.voice;
|
||||
|
||||
const onSpeechComplete = () => {
|
||||
setIsSpeaking(false);
|
||||
if (autoListenAfter && isContinuousMode && currentBeneficiary?.id) {
|
||||
setTimeout(() => {
|
||||
startListeningInternal();
|
||||
}, 500);
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
// Use Sherpa TTS (neural voice) for English if available
|
||||
if (useNeuralTTS && sherpaTTSReady && selectedVoice.language.startsWith('en')) {
|
||||
console.log('[Voice] Using Sherpa neural TTS');
|
||||
await SherpaTTS.speak(text, {
|
||||
speed: 1.0,
|
||||
onStart: () => console.log('[Voice] Sherpa speaking...'),
|
||||
onDone: onSpeechComplete,
|
||||
onError: (error) => {
|
||||
console.error('[Voice] Sherpa TTS error, falling back:', error);
|
||||
// Fall back to system TTS on error
|
||||
speakWithSystemTTS(text, onSpeechComplete);
|
||||
},
|
||||
});
|
||||
} else {
|
||||
// Fall back to system TTS for non-English or if Sherpa not available
|
||||
speakWithSystemTTS(text, onSpeechComplete);
|
||||
}
|
||||
await Speech.speak(text, speechOptions);
|
||||
} catch (error) {
|
||||
console.error('TTS error:', error);
|
||||
setIsSpeaking(false);
|
||||
}
|
||||
};
|
||||
|
||||
// System TTS fallback using expo-speech
|
||||
const speakWithSystemTTS = (text: string, onDone: () => void) => {
|
||||
const speechOptions: Speech.SpeechOptions = {
|
||||
language: selectedVoice.language,
|
||||
pitch: 1.0,
|
||||
rate: 0.9,
|
||||
onDone,
|
||||
onError: () => setIsSpeaking(false),
|
||||
};
|
||||
if (selectedVoice.voice) {
|
||||
speechOptions.voice = selectedVoice.voice;
|
||||
}
|
||||
Speech.speak(text, speechOptions);
|
||||
};
|
||||
|
||||
// DEV: Test voice with sample text
|
||||
const testVoice = (voice: VoiceOption) => {
|
||||
Speech.stop();
|
||||
|
||||
15
assets/tts-models/vits-piper-en_GB-alba-medium/MODEL_CARD
Normal file
15
assets/tts-models/vits-piper-en_GB-alba-medium/MODEL_CARD
Normal file
@ -0,0 +1,15 @@
|
||||
# Model card for alba (medium)
|
||||
|
||||
* Language: en_GB (English, Great Britain)
|
||||
* Speakers: 1
|
||||
* Quality: medium
|
||||
* Samplerate: 22,050Hz
|
||||
|
||||
## Dataset
|
||||
|
||||
* URL: https://datashare.ed.ac.uk/handle/10283/3270
|
||||
* License: https://creativecommons.org/licenses/by/4.0/
|
||||
|
||||
## Training
|
||||
|
||||
Finetuned from U.S. English lessac voice (medium quality).
|
||||
Binary file not shown.
@ -0,0 +1,493 @@
|
||||
{
|
||||
"audio": {
|
||||
"sample_rate": 22050,
|
||||
"quality": "medium"
|
||||
},
|
||||
"espeak": {
|
||||
"voice": "en-gb-x-rp"
|
||||
},
|
||||
"inference": {
|
||||
"noise_scale": 0.667,
|
||||
"length_scale": 1,
|
||||
"noise_w": 0.8
|
||||
},
|
||||
"phoneme_type": "espeak",
|
||||
"phoneme_map": {},
|
||||
"phoneme_id_map": {
|
||||
"_": [
|
||||
0
|
||||
],
|
||||
"^": [
|
||||
1
|
||||
],
|
||||
"$": [
|
||||
2
|
||||
],
|
||||
" ": [
|
||||
3
|
||||
],
|
||||
"!": [
|
||||
4
|
||||
],
|
||||
"'": [
|
||||
5
|
||||
],
|
||||
"(": [
|
||||
6
|
||||
],
|
||||
")": [
|
||||
7
|
||||
],
|
||||
",": [
|
||||
8
|
||||
],
|
||||
"-": [
|
||||
9
|
||||
],
|
||||
".": [
|
||||
10
|
||||
],
|
||||
":": [
|
||||
11
|
||||
],
|
||||
";": [
|
||||
12
|
||||
],
|
||||
"?": [
|
||||
13
|
||||
],
|
||||
"a": [
|
||||
14
|
||||
],
|
||||
"b": [
|
||||
15
|
||||
],
|
||||
"c": [
|
||||
16
|
||||
],
|
||||
"d": [
|
||||
17
|
||||
],
|
||||
"e": [
|
||||
18
|
||||
],
|
||||
"f": [
|
||||
19
|
||||
],
|
||||
"h": [
|
||||
20
|
||||
],
|
||||
"i": [
|
||||
21
|
||||
],
|
||||
"j": [
|
||||
22
|
||||
],
|
||||
"k": [
|
||||
23
|
||||
],
|
||||
"l": [
|
||||
24
|
||||
],
|
||||
"m": [
|
||||
25
|
||||
],
|
||||
"n": [
|
||||
26
|
||||
],
|
||||
"o": [
|
||||
27
|
||||
],
|
||||
"p": [
|
||||
28
|
||||
],
|
||||
"q": [
|
||||
29
|
||||
],
|
||||
"r": [
|
||||
30
|
||||
],
|
||||
"s": [
|
||||
31
|
||||
],
|
||||
"t": [
|
||||
32
|
||||
],
|
||||
"u": [
|
||||
33
|
||||
],
|
||||
"v": [
|
||||
34
|
||||
],
|
||||
"w": [
|
||||
35
|
||||
],
|
||||
"x": [
|
||||
36
|
||||
],
|
||||
"y": [
|
||||
37
|
||||
],
|
||||
"z": [
|
||||
38
|
||||
],
|
||||
"æ": [
|
||||
39
|
||||
],
|
||||
"ç": [
|
||||
40
|
||||
],
|
||||
"ð": [
|
||||
41
|
||||
],
|
||||
"ø": [
|
||||
42
|
||||
],
|
||||
"ħ": [
|
||||
43
|
||||
],
|
||||
"ŋ": [
|
||||
44
|
||||
],
|
||||
"œ": [
|
||||
45
|
||||
],
|
||||
"ǀ": [
|
||||
46
|
||||
],
|
||||
"ǁ": [
|
||||
47
|
||||
],
|
||||
"ǂ": [
|
||||
48
|
||||
],
|
||||
"ǃ": [
|
||||
49
|
||||
],
|
||||
"ɐ": [
|
||||
50
|
||||
],
|
||||
"ɑ": [
|
||||
51
|
||||
],
|
||||
"ɒ": [
|
||||
52
|
||||
],
|
||||
"ɓ": [
|
||||
53
|
||||
],
|
||||
"ɔ": [
|
||||
54
|
||||
],
|
||||
"ɕ": [
|
||||
55
|
||||
],
|
||||
"ɖ": [
|
||||
56
|
||||
],
|
||||
"ɗ": [
|
||||
57
|
||||
],
|
||||
"ɘ": [
|
||||
58
|
||||
],
|
||||
"ə": [
|
||||
59
|
||||
],
|
||||
"ɚ": [
|
||||
60
|
||||
],
|
||||
"ɛ": [
|
||||
61
|
||||
],
|
||||
"ɜ": [
|
||||
62
|
||||
],
|
||||
"ɞ": [
|
||||
63
|
||||
],
|
||||
"ɟ": [
|
||||
64
|
||||
],
|
||||
"ɠ": [
|
||||
65
|
||||
],
|
||||
"ɡ": [
|
||||
66
|
||||
],
|
||||
"ɢ": [
|
||||
67
|
||||
],
|
||||
"ɣ": [
|
||||
68
|
||||
],
|
||||
"ɤ": [
|
||||
69
|
||||
],
|
||||
"ɥ": [
|
||||
70
|
||||
],
|
||||
"ɦ": [
|
||||
71
|
||||
],
|
||||
"ɧ": [
|
||||
72
|
||||
],
|
||||
"ɨ": [
|
||||
73
|
||||
],
|
||||
"ɪ": [
|
||||
74
|
||||
],
|
||||
"ɫ": [
|
||||
75
|
||||
],
|
||||
"ɬ": [
|
||||
76
|
||||
],
|
||||
"ɭ": [
|
||||
77
|
||||
],
|
||||
"ɮ": [
|
||||
78
|
||||
],
|
||||
"ɯ": [
|
||||
79
|
||||
],
|
||||
"ɰ": [
|
||||
80
|
||||
],
|
||||
"ɱ": [
|
||||
81
|
||||
],
|
||||
"ɲ": [
|
||||
82
|
||||
],
|
||||
"ɳ": [
|
||||
83
|
||||
],
|
||||
"ɴ": [
|
||||
84
|
||||
],
|
||||
"ɵ": [
|
||||
85
|
||||
],
|
||||
"ɶ": [
|
||||
86
|
||||
],
|
||||
"ɸ": [
|
||||
87
|
||||
],
|
||||
"ɹ": [
|
||||
88
|
||||
],
|
||||
"ɺ": [
|
||||
89
|
||||
],
|
||||
"ɻ": [
|
||||
90
|
||||
],
|
||||
"ɽ": [
|
||||
91
|
||||
],
|
||||
"ɾ": [
|
||||
92
|
||||
],
|
||||
"ʀ": [
|
||||
93
|
||||
],
|
||||
"ʁ": [
|
||||
94
|
||||
],
|
||||
"ʂ": [
|
||||
95
|
||||
],
|
||||
"ʃ": [
|
||||
96
|
||||
],
|
||||
"ʄ": [
|
||||
97
|
||||
],
|
||||
"ʈ": [
|
||||
98
|
||||
],
|
||||
"ʉ": [
|
||||
99
|
||||
],
|
||||
"ʊ": [
|
||||
100
|
||||
],
|
||||
"ʋ": [
|
||||
101
|
||||
],
|
||||
"ʌ": [
|
||||
102
|
||||
],
|
||||
"ʍ": [
|
||||
103
|
||||
],
|
||||
"ʎ": [
|
||||
104
|
||||
],
|
||||
"ʏ": [
|
||||
105
|
||||
],
|
||||
"ʐ": [
|
||||
106
|
||||
],
|
||||
"ʑ": [
|
||||
107
|
||||
],
|
||||
"ʒ": [
|
||||
108
|
||||
],
|
||||
"ʔ": [
|
||||
109
|
||||
],
|
||||
"ʕ": [
|
||||
110
|
||||
],
|
||||
"ʘ": [
|
||||
111
|
||||
],
|
||||
"ʙ": [
|
||||
112
|
||||
],
|
||||
"ʛ": [
|
||||
113
|
||||
],
|
||||
"ʜ": [
|
||||
114
|
||||
],
|
||||
"ʝ": [
|
||||
115
|
||||
],
|
||||
"ʟ": [
|
||||
116
|
||||
],
|
||||
"ʡ": [
|
||||
117
|
||||
],
|
||||
"ʢ": [
|
||||
118
|
||||
],
|
||||
"ʲ": [
|
||||
119
|
||||
],
|
||||
"ˈ": [
|
||||
120
|
||||
],
|
||||
"ˌ": [
|
||||
121
|
||||
],
|
||||
"ː": [
|
||||
122
|
||||
],
|
||||
"ˑ": [
|
||||
123
|
||||
],
|
||||
"˞": [
|
||||
124
|
||||
],
|
||||
"β": [
|
||||
125
|
||||
],
|
||||
"θ": [
|
||||
126
|
||||
],
|
||||
"χ": [
|
||||
127
|
||||
],
|
||||
"ᵻ": [
|
||||
128
|
||||
],
|
||||
"ⱱ": [
|
||||
129
|
||||
],
|
||||
"0": [
|
||||
130
|
||||
],
|
||||
"1": [
|
||||
131
|
||||
],
|
||||
"2": [
|
||||
132
|
||||
],
|
||||
"3": [
|
||||
133
|
||||
],
|
||||
"4": [
|
||||
134
|
||||
],
|
||||
"5": [
|
||||
135
|
||||
],
|
||||
"6": [
|
||||
136
|
||||
],
|
||||
"7": [
|
||||
137
|
||||
],
|
||||
"8": [
|
||||
138
|
||||
],
|
||||
"9": [
|
||||
139
|
||||
],
|
||||
"̧": [
|
||||
140
|
||||
],
|
||||
"̃": [
|
||||
141
|
||||
],
|
||||
"̪": [
|
||||
142
|
||||
],
|
||||
"̯": [
|
||||
143
|
||||
],
|
||||
"̩": [
|
||||
144
|
||||
],
|
||||
"ʰ": [
|
||||
145
|
||||
],
|
||||
"ˤ": [
|
||||
146
|
||||
],
|
||||
"ε": [
|
||||
147
|
||||
],
|
||||
"↓": [
|
||||
148
|
||||
],
|
||||
"#": [
|
||||
149
|
||||
],
|
||||
"\"": [
|
||||
150
|
||||
],
|
||||
"↑": [
|
||||
151
|
||||
],
|
||||
"̺": [
|
||||
152
|
||||
],
|
||||
"̻": [
|
||||
153
|
||||
]
|
||||
},
|
||||
"num_symbols": 256,
|
||||
"num_speakers": 1,
|
||||
"speaker_id_map": {},
|
||||
"piper_version": "1.0.0",
|
||||
"language": {
|
||||
"code": "en_GB",
|
||||
"family": "en",
|
||||
"region": "GB",
|
||||
"name_native": "English",
|
||||
"name_english": "English",
|
||||
"country_english": "Great Britain"
|
||||
},
|
||||
"dataset": "alba"
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,8 @@
|
||||
name Vietnamese (Northern)
|
||||
language vi
|
||||
|
||||
words 1 2
|
||||
pitch 95 175
|
||||
|
||||
|
||||
tone 100 225 800 100 2000 50 5400 75 8000 200
|
||||
@ -0,0 +1,9 @@
|
||||
name Vietnamese (Central)
|
||||
language vi-vn-x-central
|
||||
phonemes vi-hue
|
||||
dictrules 1
|
||||
|
||||
words 1
|
||||
pitch 82 118 //80 118
|
||||
voicing 90 //18
|
||||
flutter 20
|
||||
@ -0,0 +1,9 @@
|
||||
name Vietnamese (Southern)
|
||||
language vi-vn-x-south
|
||||
phonemes vi-sgn
|
||||
dictrules 2
|
||||
|
||||
words 1
|
||||
pitch 82 118 //80 118
|
||||
voicing 90 //18
|
||||
flutter 20
|
||||
@ -0,0 +1,4 @@
|
||||
name Esperanto
|
||||
language eo
|
||||
|
||||
apostrophe 2
|
||||
@ -0,0 +1,2 @@
|
||||
name Interlingua
|
||||
language ia
|
||||
@ -0,0 +1,5 @@
|
||||
name Ido
|
||||
language io
|
||||
phonemes eo
|
||||
status testing
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
name Lojban
|
||||
language jbo
|
||||
|
||||
speed 80 // speed adjustment, percentage
|
||||
@ -0,0 +1,8 @@
|
||||
name Lingua Franca Nova
|
||||
language lfn
|
||||
|
||||
phonemes base2
|
||||
l_unpronouncable 0
|
||||
numbers 2 3
|
||||
|
||||
stressLength 150 140 180 180 0 0 200 200
|
||||
@ -0,0 +1,5 @@
|
||||
name Klingon
|
||||
language piqd
|
||||
status testing
|
||||
stressRule 3
|
||||
|
||||
@ -0,0 +1,7 @@
|
||||
name Pyash
|
||||
language py
|
||||
maintainer Logan Streondj <logan@liberit.ca>
|
||||
status testing
|
||||
|
||||
speed 80 // speed adjustment, percentage
|
||||
stressRule 0
|
||||
@ -0,0 +1,6 @@
|
||||
name Lang Belta
|
||||
language qdb
|
||||
|
||||
numbers 4 3
|
||||
|
||||
replace 1 t ?
|
||||
@ -0,0 +1,4 @@
|
||||
name Quenya
|
||||
language qya
|
||||
stressRule 2
|
||||
// rule=penultimate, with qya_rules for light penultimate syllables to move primary stress to the preceding (antepenultimate) syllable
|
||||
@ -0,0 +1,4 @@
|
||||
name Sindarin
|
||||
language sjn
|
||||
stressRule 2
|
||||
// rule=penultimate, with sjn_rules for light penultimate syllables to move primary stress to the preceding (antepenultimate) syllable
|
||||
@ -0,0 +1,6 @@
|
||||
name Nahuatl (Classical)
|
||||
language nci
|
||||
|
||||
intonation 3
|
||||
stressRule 2
|
||||
stressLength 190 190 200 200 0 0 220 240
|
||||
@ -0,0 +1,2 @@
|
||||
name Lithuanian
|
||||
language lt
|
||||
@ -0,0 +1,12 @@
|
||||
name Latgalian
|
||||
language ltg
|
||||
maintainer Valdis Vitolins <valdis.vitolins@odo.lv>
|
||||
status testing
|
||||
phonemes lv
|
||||
dictionary lv
|
||||
dictrules 2 // Setting for Latgalian pronunciation
|
||||
words 0 2
|
||||
pitch 64 118
|
||||
tone 60 150 204 100 400 255 700 10 3000 255
|
||||
stressAmp 12 10 8 8 0 0 15 16
|
||||
stressLength 160 140 200 140 0 0 240 160
|
||||
@ -0,0 +1,9 @@
|
||||
name Latvian
|
||||
language lv
|
||||
maintainer Valdis Vitolins <valdis.vitolins@odo.lv>
|
||||
status mature
|
||||
words 0 2
|
||||
pitch 67 123
|
||||
tone 60 150 204 100 400 255 700 10 3000 255
|
||||
stressAmp 11 8 11 9 0 0 14 12
|
||||
stressLength 160 120 200 130 0 0 230 180
|
||||
@ -0,0 +1,4 @@
|
||||
name Swahili
|
||||
language sw
|
||||
|
||||
status testing
|
||||
@ -0,0 +1,4 @@
|
||||
name Setswana
|
||||
language tn
|
||||
|
||||
status testing
|
||||
@ -0,0 +1,3 @@
|
||||
name Georgian
|
||||
language ka
|
||||
lowercaseSentence // A period followed by a lowercase letter is considered a sentence (mkhedruli)
|
||||
@ -0,0 +1,4 @@
|
||||
name Welsh
|
||||
language cy
|
||||
|
||||
intonation 4
|
||||
@ -0,0 +1,4 @@
|
||||
name Gaelic (Irish)
|
||||
language ga
|
||||
|
||||
dictrules 1 // fix for eclipsis
|
||||
@ -0,0 +1,4 @@
|
||||
name Gaelic (Scottish)
|
||||
language gd
|
||||
|
||||
status testing
|
||||
@ -0,0 +1,4 @@
|
||||
name Oromo
|
||||
language om
|
||||
|
||||
status testing
|
||||
@ -0,0 +1,5 @@
|
||||
name Kannada
|
||||
language kn
|
||||
|
||||
intonation 2
|
||||
//consonants 80
|
||||
@ -0,0 +1,5 @@
|
||||
name Malayalam
|
||||
language ml
|
||||
|
||||
intonation 2
|
||||
//consonants 80
|
||||
@ -0,0 +1,5 @@
|
||||
name Tamil
|
||||
language ta
|
||||
|
||||
intonation 2
|
||||
consonants 80
|
||||
@ -0,0 +1,7 @@
|
||||
name Telugu
|
||||
language te
|
||||
|
||||
status testing
|
||||
|
||||
intonation 2
|
||||
//consonants 80
|
||||
@ -0,0 +1,3 @@
|
||||
name Greenlandic
|
||||
language kl
|
||||
|
||||
@ -0,0 +1,5 @@
|
||||
name Basque
|
||||
language eu
|
||||
|
||||
status testing
|
||||
stressRule 15
|
||||
@ -0,0 +1,4 @@
|
||||
name Danish
|
||||
language da
|
||||
|
||||
tunes s2 c2 q2 e2
|
||||
@ -0,0 +1,2 @@
|
||||
name Icelandic
|
||||
language is
|
||||
@ -0,0 +1,7 @@
|
||||
name Norwegian Bokmål
|
||||
language nb
|
||||
language no
|
||||
phonemes no
|
||||
dictionary no
|
||||
|
||||
intonation 4
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user