Compare commits
26 Commits
5b5cdf1098
...
6d339acc64
| Author | SHA1 | Date | |
|---|---|---|---|
| 6d339acc64 | |||
| 3ef1d8e54c | |||
| bdb4ceb8d2 | |||
| 5efd696ef2 | |||
| 45f2b676e0 | |||
| 88d4afcdfd | |||
| 8f64a6e6af | |||
| 59f1f088ed | |||
| 3c7a48df5b | |||
| dbf6a8a74a | |||
| 764c149e2e | |||
| 1c23ca41b8 | |||
| 356205d8c0 | |||
| 66a8395602 | |||
| 9b4d39fdc5 | |||
| 6abc1f0382 | |||
| cc89c2d154 | |||
| 62eb7c4de0 | |||
| 54bff8d9d5 | |||
| caf47ead9c | |||
| bd12aadfb3 | |||
| 260a722cd9 | |||
| 432964c4d0 | |||
| 3c58ff20f9 | |||
| de2563fec6 | |||
|
|
ef533de4d5 |
13
app.json
13
app.json
@ -1,8 +1,8 @@
|
||||
{
|
||||
"expo": {
|
||||
"name": "WellNuo",
|
||||
"name": "WellNuo Lite",
|
||||
"slug": "WellNuo",
|
||||
"version": "1.0.5",
|
||||
"version": "1.0.6",
|
||||
"orientation": "portrait",
|
||||
"icon": "./assets/images/icon.png",
|
||||
"scheme": "wellnuo",
|
||||
@ -55,8 +55,13 @@
|
||||
"favicon": "./assets/images/favicon.png"
|
||||
},
|
||||
"plugins": [
|
||||
"@livekit/react-native-expo-plugin",
|
||||
"@config-plugins/react-native-webrtc",
|
||||
[
|
||||
"@jamsch/expo-speech-recognition",
|
||||
{
|
||||
"microphonePermission": "WellNuo needs access to your microphone to listen to your voice commands.",
|
||||
"speechRecognitionPermission": "WellNuo uses speech recognition to convert your voice to text for Julia AI."
|
||||
}
|
||||
],
|
||||
"expo-router",
|
||||
[
|
||||
"expo-splash-screen",
|
||||
|
||||
@ -1,17 +1,225 @@
|
||||
import { Tabs } from 'expo-router';
|
||||
import React from 'react';
|
||||
import { Platform } from 'react-native';
|
||||
import React, { useCallback, useEffect, useRef } from 'react';
|
||||
import { Platform, View, AppState, AppStateStatus } from 'react-native';
|
||||
import { Feather } from '@expo/vector-icons';
|
||||
import { useSafeAreaInsets } from 'react-native-safe-area-context';
|
||||
|
||||
import { HapticTab } from '@/components/haptic-tab';
|
||||
import { VoiceFAB } from '@/components/VoiceFAB';
|
||||
import { AppColors } from '@/constants/theme';
|
||||
import { useColorScheme } from '@/hooks/use-color-scheme';
|
||||
import { useVoiceCall } from '@/contexts/VoiceCallContext';
|
||||
import { useVoice } from '@/contexts/VoiceContext';
|
||||
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition';
|
||||
|
||||
export default function TabLayout() {
|
||||
const colorScheme = useColorScheme();
|
||||
const isDark = colorScheme === 'dark';
|
||||
const insets = useSafeAreaInsets();
|
||||
// VoiceFAB uses VoiceCallContext internally to hide when call is active
|
||||
useVoiceCall(); // Ensure context is available
|
||||
|
||||
// Voice context for listening mode toggle and TTS interruption
|
||||
const {
|
||||
isListening,
|
||||
isSpeaking,
|
||||
status,
|
||||
startSession,
|
||||
stopSession,
|
||||
interruptIfSpeaking,
|
||||
setTranscript,
|
||||
setPartialTranscript,
|
||||
sendTranscript,
|
||||
} = useVoice();
|
||||
|
||||
// Track whether session is active (listening mode on, even during TTS)
|
||||
const sessionActiveRef = useRef(false);
|
||||
// Track if we need to restart STT after it ends during active session
|
||||
const shouldRestartSTTRef = useRef(false);
|
||||
// Track pending transcript from interruption (to send after TTS stops)
|
||||
const pendingInterruptTranscriptRef = useRef<string | null>(null);
|
||||
|
||||
// Callback for voice detection - interrupt TTS when user speaks
|
||||
const handleVoiceDetected = useCallback(() => {
|
||||
// Interrupt TTS when user starts speaking during 'speaking' state
|
||||
if (status === 'speaking' || isSpeaking) {
|
||||
console.log('[TabLayout] Voice detected during TTS playback - INTERRUPTING Julia');
|
||||
const wasInterrupted = interruptIfSpeaking();
|
||||
if (wasInterrupted) {
|
||||
console.log('[TabLayout] TTS interrupted successfully, now listening to user');
|
||||
}
|
||||
}
|
||||
}, [status, isSpeaking, interruptIfSpeaking]);
|
||||
|
||||
// Callback when STT ends - may need to restart if session is still active
|
||||
const handleSTTEnd = useCallback(() => {
|
||||
console.log('[TabLayout] STT ended, sessionActive:', sessionActiveRef.current);
|
||||
// If session is still active (user didn't stop it), we should restart STT
|
||||
// This ensures STT continues during and after TTS playback
|
||||
if (sessionActiveRef.current) {
|
||||
shouldRestartSTTRef.current = true;
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Callback for STT results
|
||||
const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => {
|
||||
if (isFinal) {
|
||||
// Check if we're still in speaking mode (user interrupted Julia)
|
||||
if (isSpeaking || status === 'speaking') {
|
||||
// Store the transcript to send after TTS fully stops
|
||||
console.log('[TabLayout] Got final result while TTS playing - storing for after interruption:', transcript);
|
||||
pendingInterruptTranscriptRef.current = transcript;
|
||||
} else {
|
||||
// Normal case: not speaking, send immediately
|
||||
setTranscript(transcript);
|
||||
sendTranscript(transcript);
|
||||
}
|
||||
} else {
|
||||
setPartialTranscript(transcript);
|
||||
}
|
||||
}, [setTranscript, setPartialTranscript, sendTranscript, isSpeaking, status]);
|
||||
|
||||
// Speech recognition with voice detection callback
|
||||
const {
|
||||
startListening,
|
||||
stopListening,
|
||||
isListening: sttIsListening,
|
||||
} = useSpeechRecognition({
|
||||
lang: 'ru-RU',
|
||||
continuous: true,
|
||||
interimResults: true,
|
||||
onVoiceDetected: handleVoiceDetected,
|
||||
onResult: handleSpeechResult,
|
||||
onEnd: handleSTTEnd,
|
||||
});
|
||||
|
||||
// Update session active ref when isListening changes
|
||||
useEffect(() => {
|
||||
sessionActiveRef.current = isListening;
|
||||
if (!isListening) {
|
||||
shouldRestartSTTRef.current = false;
|
||||
}
|
||||
}, [isListening]);
|
||||
|
||||
// Start/stop STT when voice session starts/stops
|
||||
useEffect(() => {
|
||||
if (isListening) {
|
||||
console.log('[TabLayout] Starting STT for voice session');
|
||||
startListening();
|
||||
} else {
|
||||
console.log('[TabLayout] Stopping STT - session ended');
|
||||
stopListening();
|
||||
}
|
||||
}, [isListening, startListening, stopListening]);
|
||||
|
||||
// Restart STT if it ended while session is still active
|
||||
// This ensures continuous listening even during/after TTS playback
|
||||
useEffect(() => {
|
||||
if (shouldRestartSTTRef.current && sessionActiveRef.current && !sttIsListening) {
|
||||
console.log('[TabLayout] Restarting STT - session still active');
|
||||
shouldRestartSTTRef.current = false;
|
||||
// Small delay to ensure clean restart
|
||||
const timer = setTimeout(() => {
|
||||
if (sessionActiveRef.current) {
|
||||
startListening();
|
||||
}
|
||||
}, 100);
|
||||
return () => clearTimeout(timer);
|
||||
}
|
||||
}, [sttIsListening, startListening]);
|
||||
|
||||
// Track previous status to detect transition from speaking to listening
|
||||
const prevStatusRef = useRef<typeof status>('idle');
|
||||
|
||||
// Auto-restart STT when TTS finishes (status changes from 'speaking' to 'listening')
|
||||
// Also process any pending transcript from user interruption
|
||||
useEffect(() => {
|
||||
const prevStatus = prevStatusRef.current;
|
||||
prevStatusRef.current = status;
|
||||
|
||||
// When transitioning from speaking to listening, handle pending interrupt transcript
|
||||
if (prevStatus === 'speaking' && status === 'listening' && sessionActiveRef.current) {
|
||||
console.log('[TabLayout] TTS finished/interrupted - checking for pending transcript');
|
||||
|
||||
// Process pending transcript from interruption if any
|
||||
const pendingTranscript = pendingInterruptTranscriptRef.current;
|
||||
if (pendingTranscript) {
|
||||
console.log('[TabLayout] Processing pending interrupt transcript:', pendingTranscript);
|
||||
pendingInterruptTranscriptRef.current = null;
|
||||
setTranscript(pendingTranscript);
|
||||
sendTranscript(pendingTranscript);
|
||||
}
|
||||
|
||||
// Small delay to ensure TTS cleanup is complete, then restart STT
|
||||
const timer = setTimeout(() => {
|
||||
if (sessionActiveRef.current && !sttIsListening) {
|
||||
startListening();
|
||||
}
|
||||
}, 200);
|
||||
return () => clearTimeout(timer);
|
||||
}
|
||||
}, [status, sttIsListening, startListening, setTranscript, sendTranscript]);
|
||||
|
||||
// ============================================================================
|
||||
// TAB NAVIGATION PERSISTENCE
|
||||
// Ensure voice session continues when user switches between tabs.
|
||||
// The session state is in VoiceContext (root level), but STT may stop due to:
|
||||
// 1. Native audio session changes
|
||||
// 2. Tab unmount/remount (though tabs layout doesn't unmount)
|
||||
// 3. AppState changes (background/foreground)
|
||||
// ============================================================================
|
||||
|
||||
// Monitor and recover STT state during tab navigation
|
||||
// If session is active but STT stopped unexpectedly, restart it
|
||||
// IMPORTANT: STT should run DURING TTS playback to detect user interruption!
|
||||
useEffect(() => {
|
||||
// Check every 500ms if STT needs to be restarted
|
||||
const intervalId = setInterval(() => {
|
||||
// Only act if session should be active (isListening from VoiceContext)
|
||||
// but STT is not actually listening
|
||||
// Note: We DO want STT running during 'speaking' to detect interruption!
|
||||
// Only skip during 'processing' (API call in progress)
|
||||
if (
|
||||
sessionActiveRef.current &&
|
||||
!sttIsListening &&
|
||||
status !== 'processing'
|
||||
) {
|
||||
console.log('[TabLayout] STT watchdog: restarting STT (session active but STT stopped, status:', status, ')');
|
||||
startListening();
|
||||
}
|
||||
}, 500);
|
||||
|
||||
return () => clearInterval(intervalId);
|
||||
}, [sttIsListening, status, startListening]);
|
||||
|
||||
// Handle app state changes (background/foreground)
|
||||
// When app comes back to foreground, restart STT if session was active
|
||||
useEffect(() => {
|
||||
const handleAppStateChange = (nextAppState: AppStateStatus) => {
|
||||
if (nextAppState === 'active' && sessionActiveRef.current) {
|
||||
// App came to foreground, give it a moment then check STT
|
||||
// STT should run even during 'speaking' to detect user interruption
|
||||
setTimeout(() => {
|
||||
if (sessionActiveRef.current && !sttIsListening && status !== 'processing') {
|
||||
console.log('[TabLayout] App foregrounded - restarting STT');
|
||||
startListening();
|
||||
}
|
||||
}, 300);
|
||||
}
|
||||
};
|
||||
|
||||
const subscription = AppState.addEventListener('change', handleAppStateChange);
|
||||
return () => subscription.remove();
|
||||
}, [sttIsListening, status, startListening]);
|
||||
|
||||
// Handle voice FAB press - toggle listening mode
|
||||
const handleVoiceFABPress = useCallback(() => {
|
||||
if (isListening) {
|
||||
stopSession();
|
||||
} else {
|
||||
startSession();
|
||||
}
|
||||
}, [isListening, startSession, stopSession]);
|
||||
|
||||
// Calculate tab bar height based on safe area
|
||||
// On iOS with home indicator, insets.bottom is ~34px
|
||||
@ -24,6 +232,7 @@ export default function TabLayout() {
|
||||
const tabBarHeight = 60 + bottomPadding; // 60px for content + safe area padding
|
||||
|
||||
return (
|
||||
<View style={{ flex: 1 }}>
|
||||
<Tabs
|
||||
screenOptions={{
|
||||
tabBarActiveTintColor: AppColors.primary,
|
||||
@ -85,6 +294,13 @@ export default function TabLayout() {
|
||||
href: null,
|
||||
}}
|
||||
/>
|
||||
{/* Audio Debug - hidden */}
|
||||
<Tabs.Screen
|
||||
name="audio-debug"
|
||||
options={{
|
||||
href: null,
|
||||
}}
|
||||
/>
|
||||
{/* Beneficiaries - hidden from tab bar but keeps tab bar visible */}
|
||||
<Tabs.Screen
|
||||
name="beneficiaries"
|
||||
@ -93,5 +309,9 @@ export default function TabLayout() {
|
||||
}}
|
||||
/>
|
||||
</Tabs>
|
||||
|
||||
{/* Voice FAB - toggle listening mode */}
|
||||
<VoiceFAB onPress={handleVoiceFABPress} isListening={isListening} />
|
||||
</View>
|
||||
);
|
||||
}
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Chat Screen - Text Chat with Julia AI
|
||||
*
|
||||
* Clean text chat interface with integrated voice calls.
|
||||
* Clean text chat interface.
|
||||
*/
|
||||
|
||||
import React, { useState, useCallback, useRef, useEffect } from 'react';
|
||||
@ -17,39 +17,18 @@ import {
|
||||
Keyboard,
|
||||
Platform,
|
||||
Alert,
|
||||
Animated,
|
||||
ScrollView,
|
||||
} from 'react-native';
|
||||
import * as Clipboard from 'expo-clipboard';
|
||||
import { KeyboardAvoidingView } from 'react-native-keyboard-controller';
|
||||
import { Ionicons } from '@expo/vector-icons';
|
||||
import { SafeAreaView } from 'react-native-safe-area-context';
|
||||
import { useRouter, useFocusEffect } from 'expo-router';
|
||||
import { activateKeepAwakeAsync, deactivateKeepAwake } from 'expo-keep-awake';
|
||||
import { api } from '@/services/api';
|
||||
import { useBeneficiary } from '@/contexts/BeneficiaryContext';
|
||||
import { useVoiceTranscript } from '@/contexts/VoiceTranscriptContext';
|
||||
import { useVoiceCall } from '@/contexts/VoiceCallContext';
|
||||
import { useTextToSpeech } from '@/hooks/useTextToSpeech';
|
||||
import { AppColors, BorderRadius, FontSizes, Spacing } from '@/constants/theme';
|
||||
import type { Message, Beneficiary } from '@/types';
|
||||
|
||||
// LiveKit imports
|
||||
import {
|
||||
registerGlobals,
|
||||
LiveKitRoom,
|
||||
useVoiceAssistant,
|
||||
useConnectionState,
|
||||
useTrackTranscription,
|
||||
useTracks,
|
||||
} from '@livekit/react-native';
|
||||
import { ConnectionState, Track } from 'livekit-client';
|
||||
import { getToken, type BeneficiaryData } from '@/services/livekitService';
|
||||
import { useAuth } from '@/contexts/AuthContext';
|
||||
import { getAvailableAudioOutputs, selectAudioOutput, setAudioOutput } from '@/utils/audioSession';
|
||||
|
||||
// Register LiveKit globals (must be called before using LiveKit)
|
||||
registerGlobals();
|
||||
|
||||
const API_URL = 'https://eluxnetworks.net/function/well-api/api';
|
||||
|
||||
// WellNuo API credentials (same as julia-agent)
|
||||
@ -129,165 +108,22 @@ function normalizeQuestion(userMessage: string): string {
|
||||
return userMessage;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Voice Call Transcript Handler (invisible - just captures transcripts)
|
||||
// ============================================================================
|
||||
|
||||
interface VoiceCallTranscriptHandlerProps {
|
||||
onTranscript: (role: 'user' | 'assistant', text: string) => void;
|
||||
onDurationUpdate: (seconds: number) => void;
|
||||
onLog?: (message: string) => void;
|
||||
}
|
||||
|
||||
// Debug log entry type
|
||||
interface DebugLogEntry {
|
||||
id: string;
|
||||
timestamp: string;
|
||||
level: 'info' | 'warn' | 'error' | 'success';
|
||||
message: string;
|
||||
}
|
||||
|
||||
function VoiceCallTranscriptHandler({ onTranscript, onDurationUpdate, onLog }: VoiceCallTranscriptHandlerProps) {
|
||||
const connectionState = useConnectionState();
|
||||
const { audioTrack, state: agentState } = useVoiceAssistant();
|
||||
const [callDuration, setCallDuration] = useState(0);
|
||||
const [lastProcessedId, setLastProcessedId] = useState<string | null>(null);
|
||||
const prevConnectionStateRef = useRef<ConnectionState | null>(null);
|
||||
const prevAgentStateRef = useRef<string | null>(null);
|
||||
|
||||
// Track all audio tracks for transcription
|
||||
const tracks = useTracks([Track.Source.Microphone, Track.Source.Unknown], { onlySubscribed: false });
|
||||
|
||||
// Get transcription from agent's audio track
|
||||
const { segments: agentSegments } = useTrackTranscription(audioTrack);
|
||||
|
||||
// Get transcription from user's microphone
|
||||
const localTrack = tracks.find(t => t.participant?.isLocal);
|
||||
const { segments: userSegments } = useTrackTranscription(localTrack);
|
||||
|
||||
// Log connection state changes
|
||||
useEffect(() => {
|
||||
if (prevConnectionStateRef.current !== connectionState) {
|
||||
const msg = `Connection: ${prevConnectionStateRef.current || 'initial'} -> ${connectionState}`;
|
||||
console.log('[VoiceCall]', msg);
|
||||
onLog?.(msg);
|
||||
prevConnectionStateRef.current = connectionState;
|
||||
}
|
||||
}, [connectionState, onLog]);
|
||||
|
||||
// Log agent state changes
|
||||
useEffect(() => {
|
||||
if (agentState && prevAgentStateRef.current !== agentState) {
|
||||
const msg = `Agent state: ${prevAgentStateRef.current || 'initial'} -> ${agentState}`;
|
||||
console.log('[VoiceCall]', msg);
|
||||
onLog?.(msg);
|
||||
prevAgentStateRef.current = agentState;
|
||||
}
|
||||
}, [agentState, onLog]);
|
||||
|
||||
// Log audio track info
|
||||
useEffect(() => {
|
||||
if (audioTrack) {
|
||||
// audioTrack may have different properties depending on LiveKit version
|
||||
const trackInfo = JSON.stringify({
|
||||
hasTrack: !!audioTrack,
|
||||
publication: (audioTrack as any)?.publication?.sid || 'no-pub',
|
||||
trackSid: (audioTrack as any)?.sid || (audioTrack as any)?.trackSid || 'unknown',
|
||||
});
|
||||
const msg = `Audio track received: ${trackInfo}`;
|
||||
console.log('[VoiceCall]', msg);
|
||||
onLog?.(msg);
|
||||
}
|
||||
}, [audioTrack, onLog]);
|
||||
|
||||
// Log all tracks
|
||||
useEffect(() => {
|
||||
if (tracks.length > 0) {
|
||||
const trackInfo = tracks.map(t => {
|
||||
const participant = t.participant?.identity || 'unknown';
|
||||
const source = t.source || 'unknown';
|
||||
const isLocal = t.participant?.isLocal ? 'local' : 'remote';
|
||||
return `${participant}(${isLocal}):${source}`;
|
||||
}).join(', ');
|
||||
const msg = `Tracks (${tracks.length}): ${trackInfo}`;
|
||||
console.log('[VoiceCall]', msg);
|
||||
onLog?.(msg);
|
||||
}
|
||||
}, [tracks, onLog]);
|
||||
|
||||
// Process agent transcription
|
||||
useEffect(() => {
|
||||
if (agentSegments && agentSegments.length > 0) {
|
||||
const lastSegment = agentSegments[agentSegments.length - 1];
|
||||
if (lastSegment && lastSegment.final && lastSegment.id !== lastProcessedId) {
|
||||
setLastProcessedId(lastSegment.id);
|
||||
onTranscript('assistant', lastSegment.text);
|
||||
const msg = `Julia said: "${lastSegment.text}"`;
|
||||
console.log('[VoiceCall]', msg);
|
||||
onLog?.(msg);
|
||||
}
|
||||
}
|
||||
}, [agentSegments, lastProcessedId, onTranscript, onLog]);
|
||||
|
||||
// Process user transcription
|
||||
const [lastUserSegmentId, setLastUserSegmentId] = useState<string | null>(null);
|
||||
useEffect(() => {
|
||||
if (userSegments && userSegments.length > 0) {
|
||||
const lastSegment = userSegments[userSegments.length - 1];
|
||||
if (lastSegment && lastSegment.final && lastSegment.id !== lastUserSegmentId) {
|
||||
setLastUserSegmentId(lastSegment.id);
|
||||
onTranscript('user', lastSegment.text);
|
||||
const msg = `User said: "${lastSegment.text}"`;
|
||||
console.log('[VoiceCall]', msg);
|
||||
onLog?.(msg);
|
||||
}
|
||||
}
|
||||
}, [userSegments, lastUserSegmentId, onTranscript, onLog]);
|
||||
|
||||
// Call duration timer - use ref to avoid state updates during render
|
||||
const durationRef = useRef(0);
|
||||
useEffect(() => {
|
||||
if (connectionState === ConnectionState.Connected) {
|
||||
const interval = setInterval(() => {
|
||||
durationRef.current += 1;
|
||||
onDurationUpdate(durationRef.current);
|
||||
}, 1000);
|
||||
return () => clearInterval(interval);
|
||||
}
|
||||
}, [connectionState, onDurationUpdate]);
|
||||
|
||||
// Keep screen awake during call
|
||||
useEffect(() => {
|
||||
activateKeepAwakeAsync('voice-call');
|
||||
return () => {
|
||||
deactivateKeepAwake('voice-call');
|
||||
};
|
||||
}, []);
|
||||
|
||||
// This component renders nothing - it just handles transcripts
|
||||
return null;
|
||||
}
|
||||
|
||||
export default function ChatScreen() {
|
||||
const router = useRouter();
|
||||
const { currentBeneficiary, setCurrentBeneficiary } = useBeneficiary();
|
||||
const { addTranscriptEntry, clearTranscript } = useVoiceTranscript();
|
||||
const { user } = useAuth();
|
||||
const {
|
||||
callState,
|
||||
startCall,
|
||||
endCall: endVoiceCallContext,
|
||||
minimizeCall,
|
||||
maximizeCall,
|
||||
updateDuration,
|
||||
isCallActive,
|
||||
} = useVoiceCall();
|
||||
const { transcript, hasNewTranscript, markTranscriptAsShown, getTranscriptAsMessages } = useVoiceTranscript();
|
||||
|
||||
// TTS for reading Julia's responses aloud
|
||||
const { speak, stop: stopTTS, isSpeaking } = useTextToSpeech({
|
||||
language: 'ru-RU',
|
||||
rate: 1.0,
|
||||
});
|
||||
|
||||
// Helper to create initial message with beneficiary name
|
||||
const createInitialMessage = useCallback((beneficiaryName?: string | null): Message => ({
|
||||
id: '1',
|
||||
role: 'assistant',
|
||||
content: `Hello! I'm Julia, your AI wellness companion.${beneficiaryName ? `\n\nI'm here to help you monitor ${beneficiaryName}.` : ''}\n\nTap the phone button to start a voice call, or type a message below.`,
|
||||
content: `Hello! I'm Julia, your AI wellness companion.${beneficiaryName ? `\n\nI'm here to help you monitor ${beneficiaryName}.` : ''}\n\nType a message below to chat with me.`,
|
||||
timestamp: new Date(),
|
||||
}), []);
|
||||
|
||||
@ -299,84 +135,6 @@ export default function ChatScreen() {
|
||||
const [messages, setMessages] = useState<Message[]>([createInitialMessage(null)]);
|
||||
const [sortNewestFirst, setSortNewestFirst] = useState(false);
|
||||
|
||||
// Voice call state (local connecting state only)
|
||||
const [isConnectingVoice, setIsConnectingVoice] = useState(false);
|
||||
|
||||
// Debug logs state
|
||||
const [debugLogs, setDebugLogs] = useState<DebugLogEntry[]>([]);
|
||||
const [showDebugPanel, setShowDebugPanel] = useState(false);
|
||||
const debugLogIdRef = useRef(0);
|
||||
|
||||
// Add debug log entry
|
||||
const addDebugLog = useCallback((message: string, level: DebugLogEntry['level'] = 'info') => {
|
||||
const now = new Date();
|
||||
const timestamp = now.toLocaleTimeString('en-US', {
|
||||
hour12: false,
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
second: '2-digit',
|
||||
}) + '.' + now.getMilliseconds().toString().padStart(3, '0');
|
||||
|
||||
const entry: DebugLogEntry = {
|
||||
id: `log-${++debugLogIdRef.current}`,
|
||||
timestamp,
|
||||
level,
|
||||
message,
|
||||
};
|
||||
setDebugLogs(prev => [...prev.slice(-100), entry]); // Keep last 100 logs
|
||||
}, []);
|
||||
|
||||
// Copy logs to clipboard
|
||||
const copyLogsToClipboard = useCallback(async () => {
|
||||
const logsText = debugLogs.map(log => `[${log.timestamp}] ${log.level.toUpperCase()}: ${log.message}`).join('\n');
|
||||
await Clipboard.setStringAsync(logsText);
|
||||
Alert.alert('Copied', `${debugLogs.length} log entries copied to clipboard`);
|
||||
}, [debugLogs]);
|
||||
|
||||
// Clear debug logs
|
||||
const clearDebugLogs = useCallback(() => {
|
||||
setDebugLogs([]);
|
||||
addDebugLog('Logs cleared', 'info');
|
||||
}, [addDebugLog]);
|
||||
|
||||
// Pulsing animation for active call
|
||||
const pulseAnim = useRef(new Animated.Value(1)).current;
|
||||
|
||||
// Start pulsing animation when call is active
|
||||
useEffect(() => {
|
||||
if (isCallActive) {
|
||||
const pulse = Animated.loop(
|
||||
Animated.sequence([
|
||||
Animated.timing(pulseAnim, {
|
||||
toValue: 1.15,
|
||||
duration: 600,
|
||||
useNativeDriver: true,
|
||||
}),
|
||||
Animated.timing(pulseAnim, {
|
||||
toValue: 1,
|
||||
duration: 600,
|
||||
useNativeDriver: true,
|
||||
}),
|
||||
])
|
||||
);
|
||||
pulse.start();
|
||||
return () => pulse.stop();
|
||||
} else {
|
||||
pulseAnim.setValue(1);
|
||||
}
|
||||
}, [isCallActive, pulseAnim]);
|
||||
|
||||
// Track if we've shown the voice call separator for current call
|
||||
const [hasShownVoiceSeparator, setHasShownVoiceSeparator] = useState(false);
|
||||
|
||||
// Reset separator flag when starting a new call
|
||||
useEffect(() => {
|
||||
if (isCallActive && !hasShownVoiceSeparator) {
|
||||
// Will show separator on first voice message
|
||||
} else if (!isCallActive) {
|
||||
setHasShownVoiceSeparator(false);
|
||||
}
|
||||
}, [isCallActive]);
|
||||
const [input, setInput] = useState('');
|
||||
const [isSending, setIsSending] = useState(false);
|
||||
const inputRef = useRef('');
|
||||
@ -429,20 +187,15 @@ export default function ChatScreen() {
|
||||
old: previousDeploymentIdRef.current,
|
||||
new: customDeploymentId,
|
||||
name: deploymentName,
|
||||
isCallActive,
|
||||
});
|
||||
|
||||
// End any active call
|
||||
endVoiceCallContext();
|
||||
|
||||
// Clear chat with new initial message (use name instead of ID)
|
||||
setMessages([createInitialMessage(deploymentName)]);
|
||||
setHasShownVoiceSeparator(false);
|
||||
|
||||
// Update ref
|
||||
previousDeploymentIdRef.current = customDeploymentId;
|
||||
}
|
||||
}, [customDeploymentId, deploymentName, createInitialMessage, isCallActive, endVoiceCallContext]);
|
||||
}, [customDeploymentId, deploymentName, createInitialMessage]);
|
||||
|
||||
// Update initial message when deploymentName is loaded (but only if chat has just the initial message)
|
||||
useEffect(() => {
|
||||
@ -451,6 +204,26 @@ export default function ChatScreen() {
|
||||
}
|
||||
}, [deploymentName, createInitialMessage]);
|
||||
|
||||
// Add voice transcript messages to chat when new ones arrive
|
||||
useEffect(() => {
|
||||
if (hasNewTranscript && transcript.length > 0) {
|
||||
const voiceMessages = getTranscriptAsMessages();
|
||||
if (voiceMessages.length > 0) {
|
||||
setMessages(prev => {
|
||||
// Filter out messages that are already in the chat (by id)
|
||||
const existingIds = new Set(prev.map(m => m.id));
|
||||
const newMessages = voiceMessages.filter(m => !existingIds.has(m.id));
|
||||
if (newMessages.length > 0) {
|
||||
console.log('[Chat] Adding', newMessages.length, 'voice messages to chat');
|
||||
return [...prev, ...newMessages];
|
||||
}
|
||||
return prev;
|
||||
});
|
||||
}
|
||||
markTranscriptAsShown();
|
||||
}
|
||||
}, [hasNewTranscript, transcript, getTranscriptAsMessages, markTranscriptAsShown]);
|
||||
|
||||
// Load beneficiaries
|
||||
const loadBeneficiaries = useCallback(async () => {
|
||||
setLoadingBeneficiaries(true);
|
||||
@ -516,161 +289,6 @@ export default function ChatScreen() {
|
||||
setShowBeneficiaryPicker(false);
|
||||
}, [setCurrentBeneficiary]);
|
||||
|
||||
// ============================================================================
|
||||
// Voice Call Functions
|
||||
// ============================================================================
|
||||
|
||||
// Start voice call
|
||||
const startVoiceCall = useCallback(async () => {
|
||||
if (isConnectingVoice || isCallActive) return;
|
||||
|
||||
setIsConnectingVoice(true);
|
||||
addDebugLog('Starting voice call...', 'info');
|
||||
console.log('[Chat] Starting voice call...');
|
||||
|
||||
try {
|
||||
// Build beneficiary data for the agent
|
||||
// Priority: customDeploymentId from settings > currentBeneficiary > first beneficiary > fallback
|
||||
const beneficiaryData: BeneficiaryData = {
|
||||
deploymentId: customDeploymentId || currentBeneficiary?.id?.toString() || beneficiaries[0]?.id?.toString() || '21',
|
||||
beneficiaryNamesDict: {},
|
||||
};
|
||||
addDebugLog(`Deployment ID: ${beneficiaryData.deploymentId}`, 'info');
|
||||
|
||||
// Add names dict if not in single deployment mode
|
||||
if (!SINGLE_DEPLOYMENT_MODE) {
|
||||
beneficiaries.forEach(b => {
|
||||
beneficiaryData.beneficiaryNamesDict[b.id.toString()] = b.name;
|
||||
});
|
||||
}
|
||||
|
||||
// Get LiveKit token
|
||||
addDebugLog('Requesting LiveKit token...', 'info');
|
||||
const userIdStr = user?.user_id?.toString() || 'user-' + Date.now();
|
||||
const tokenResponse = await getToken(userIdStr, beneficiaryData);
|
||||
|
||||
if (!tokenResponse.success || !tokenResponse.data) {
|
||||
throw new Error(tokenResponse.error || 'Failed to get voice token');
|
||||
}
|
||||
|
||||
addDebugLog(`Token received! Room: ${tokenResponse.data.roomName}`, 'success');
|
||||
addDebugLog(`WS URL: ${tokenResponse.data.wsUrl}`, 'info');
|
||||
console.log('[Chat] Got voice token, connecting to room:', tokenResponse.data.roomName);
|
||||
|
||||
// Add call start message to chat
|
||||
const callStartMessage: Message = {
|
||||
id: `call-start-${Date.now()}`,
|
||||
role: 'assistant',
|
||||
content: 'Voice call started',
|
||||
timestamp: new Date(),
|
||||
isSystem: true,
|
||||
};
|
||||
setMessages(prev => [...prev, callStartMessage]);
|
||||
|
||||
// Clear previous transcript and start call via context
|
||||
clearTranscript();
|
||||
addDebugLog('Calling startCall with token and wsUrl...', 'info');
|
||||
startCall({
|
||||
token: tokenResponse.data.token,
|
||||
wsUrl: tokenResponse.data.wsUrl,
|
||||
beneficiaryName: currentBeneficiary?.name,
|
||||
beneficiaryId: currentBeneficiary?.id?.toString(),
|
||||
});
|
||||
addDebugLog('startCall called, waiting for LiveKitRoom to connect...', 'success');
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
|
||||
addDebugLog(`Voice call error: ${errorMsg}`, 'error');
|
||||
console.error('[Chat] Voice call error:', error);
|
||||
Alert.alert(
|
||||
'Voice Call Error',
|
||||
error instanceof Error ? error.message : 'Failed to start voice call'
|
||||
);
|
||||
} finally {
|
||||
setIsConnectingVoice(false);
|
||||
}
|
||||
}, [isConnectingVoice, isCallActive, currentBeneficiary, beneficiaries, user, clearTranscript, startCall, customDeploymentId, addDebugLog]);
|
||||
|
||||
// End voice call and log to chat
|
||||
const endVoiceCall = useCallback(() => {
|
||||
console.log('[Chat] Ending voice call...');
|
||||
|
||||
// Add call end message to chat with duration
|
||||
const duration = callState.callDuration;
|
||||
const minutes = Math.floor(duration / 60);
|
||||
const seconds = duration % 60;
|
||||
const durationStr = `${minutes}:${seconds.toString().padStart(2, '0')}`;
|
||||
|
||||
const callEndMessage: Message = {
|
||||
id: `call-end-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
||||
role: 'assistant',
|
||||
content: `Call ended (${durationStr})`,
|
||||
timestamp: new Date(),
|
||||
isSystem: true,
|
||||
};
|
||||
setMessages(prev => [...prev, callEndMessage]);
|
||||
setHasShownVoiceSeparator(false);
|
||||
|
||||
endVoiceCallContext();
|
||||
}, [endVoiceCallContext, callState.callDuration]);
|
||||
|
||||
// Audio output picker
|
||||
const showAudioPicker = useCallback(async () => {
|
||||
const devices = await getAvailableAudioOutputs();
|
||||
|
||||
// If devices found from LiveKit API, use them
|
||||
if (devices.length > 0) {
|
||||
const buttons: any[] = devices.map(device => ({
|
||||
text: device.name,
|
||||
onPress: () => selectAudioOutput(device.id),
|
||||
}));
|
||||
buttons.push({ text: 'Cancel', style: 'cancel' });
|
||||
Alert.alert('Audio Output', 'Select audio device:', buttons);
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback for Android (and iOS if no devices found)
|
||||
// Show simple Speaker/Earpiece toggle using setAudioOutput()
|
||||
Alert.alert(
|
||||
'Audio Output',
|
||||
'Select audio output:',
|
||||
[
|
||||
{
|
||||
text: '🔊 Speaker',
|
||||
onPress: () => setAudioOutput(true),
|
||||
},
|
||||
{
|
||||
text: '📱 Earpiece',
|
||||
onPress: () => setAudioOutput(false),
|
||||
},
|
||||
{ text: 'Cancel', style: 'cancel' },
|
||||
]
|
||||
);
|
||||
}, []);
|
||||
|
||||
// Handle voice transcript entries - add to chat in real-time
|
||||
const handleVoiceTranscript = useCallback((role: 'user' | 'assistant', text: string) => {
|
||||
if (!text.trim()) return;
|
||||
|
||||
// Create voice message and add to chat immediately
|
||||
const voiceMessage: Message = {
|
||||
id: `voice-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
||||
role,
|
||||
content: text.trim(),
|
||||
timestamp: new Date(),
|
||||
isVoice: true,
|
||||
};
|
||||
|
||||
setMessages(prev => [...prev, voiceMessage]);
|
||||
|
||||
// Scroll to latest message (respects sort mode)
|
||||
setTimeout(() => {
|
||||
scrollToLatestMessage(true);
|
||||
}, 100);
|
||||
|
||||
// Also store in transcript context for persistence
|
||||
addTranscriptEntry(role, text);
|
||||
}, [hasShownVoiceSeparator, addTranscriptEntry, scrollToLatestMessage]);
|
||||
|
||||
// Cached API token for WellNuo
|
||||
const apiTokenRef = useRef<string | null>(null);
|
||||
|
||||
@ -765,13 +383,17 @@ export default function ChatScreen() {
|
||||
const data = await response.json();
|
||||
|
||||
if (data.ok && data.response?.body) {
|
||||
const responseText = data.response.body;
|
||||
const assistantMessage: Message = {
|
||||
id: (Date.now() + 1).toString(),
|
||||
role: 'assistant',
|
||||
content: data.response.body,
|
||||
content: responseText,
|
||||
timestamp: new Date(),
|
||||
};
|
||||
setMessages(prev => [...prev, assistantMessage]);
|
||||
|
||||
// Speak the response using TTS
|
||||
speak(responseText);
|
||||
} else {
|
||||
// Token might be expired, clear and retry once
|
||||
if (data.status === '401 Unauthorized') {
|
||||
@ -791,7 +413,7 @@ export default function ChatScreen() {
|
||||
} finally {
|
||||
setIsSending(false);
|
||||
}
|
||||
}, [isSending, getWellNuoToken, customDeploymentId, currentBeneficiary, beneficiaries]);
|
||||
}, [isSending, getWellNuoToken, customDeploymentId, currentBeneficiary, beneficiaries, speak]);
|
||||
|
||||
// Render message bubble
|
||||
const renderMessage = ({ item }: { item: Message }) => {
|
||||
@ -799,7 +421,7 @@ export default function ChatScreen() {
|
||||
const isVoice = item.isVoice;
|
||||
const isSystem = item.isSystem;
|
||||
|
||||
// System messages (like "Voice Call Transcript" separator)
|
||||
// System messages
|
||||
if (isSystem) {
|
||||
return (
|
||||
<View style={styles.systemMessageContainer}>
|
||||
@ -820,12 +442,7 @@ export default function ChatScreen() {
|
||||
<Text style={styles.avatarText}>J</Text>
|
||||
</View>
|
||||
)}
|
||||
<View style={[styles.messageBubble, isUser ? styles.userBubble : styles.assistantBubble, isVoice && styles.voiceBubble]}>
|
||||
{isVoice && (
|
||||
<View style={styles.voiceIndicator}>
|
||||
<Text style={styles.voiceIndicatorEmoji}>🎤</Text>
|
||||
</View>
|
||||
)}
|
||||
<View style={[styles.messageBubble, isUser ? styles.userBubble : styles.assistantBubble]}>
|
||||
<Text style={[styles.messageText, isUser ? styles.userMessageText : styles.assistantMessageText]}>
|
||||
{item.content}
|
||||
</Text>
|
||||
@ -855,6 +472,15 @@ export default function ChatScreen() {
|
||||
</Text>
|
||||
</View>
|
||||
</View>
|
||||
{/* TTS Stop button - only visible when speaking */}
|
||||
{isSpeaking && (
|
||||
<TouchableOpacity
|
||||
style={[styles.headerButton, styles.speakingButton]}
|
||||
onPress={stopTTS}
|
||||
>
|
||||
<Ionicons name="volume-high" size={22} color={AppColors.primary} />
|
||||
</TouchableOpacity>
|
||||
)}
|
||||
<TouchableOpacity
|
||||
style={styles.headerButton}
|
||||
onPress={() => setSortNewestFirst(prev => !prev)}
|
||||
@ -868,6 +494,7 @@ export default function ChatScreen() {
|
||||
<TouchableOpacity
|
||||
style={styles.headerButton}
|
||||
onPress={() => {
|
||||
stopTTS(); // Stop TTS when clearing chat
|
||||
Alert.alert(
|
||||
'Clear Chat',
|
||||
'Are you sure you want to clear all messages?',
|
||||
@ -881,7 +508,7 @@ export default function ChatScreen() {
|
||||
{
|
||||
id: '1',
|
||||
role: 'assistant',
|
||||
content: 'Hello! I\'m Julia, your AI wellness assistant. You can type a message or tap the phone button to start a voice call.',
|
||||
content: 'Hello! I\'m Julia, your AI wellness assistant. Type a message below to chat with me.',
|
||||
timestamp: new Date(),
|
||||
},
|
||||
]);
|
||||
@ -951,53 +578,6 @@ export default function ChatScreen() {
|
||||
</View>
|
||||
</Modal>
|
||||
|
||||
{/* Debug Logs Modal */}
|
||||
<Modal
|
||||
visible={showDebugPanel}
|
||||
transparent
|
||||
animationType="slide"
|
||||
onRequestClose={() => setShowDebugPanel(false)}
|
||||
>
|
||||
<View style={styles.modalOverlay}>
|
||||
<View style={[styles.modalContent, styles.debugModalContent]}>
|
||||
<View style={styles.modalHeader}>
|
||||
<Text style={styles.modalTitle}>Debug Logs ({debugLogs.length})</Text>
|
||||
<View style={styles.debugHeaderButtons}>
|
||||
<TouchableOpacity style={styles.debugHeaderBtn} onPress={copyLogsToClipboard}>
|
||||
<Ionicons name="copy-outline" size={20} color={AppColors.primary} />
|
||||
</TouchableOpacity>
|
||||
<TouchableOpacity style={styles.debugHeaderBtn} onPress={clearDebugLogs}>
|
||||
<Ionicons name="trash-outline" size={20} color={AppColors.error} />
|
||||
</TouchableOpacity>
|
||||
<TouchableOpacity onPress={() => setShowDebugPanel(false)}>
|
||||
<Ionicons name="close" size={24} color={AppColors.textPrimary} />
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
</View>
|
||||
|
||||
<ScrollView style={styles.debugLogsContainer}>
|
||||
{debugLogs.length === 0 ? (
|
||||
<Text style={styles.debugEmptyText}>No logs yet. Start a voice call to see logs.</Text>
|
||||
) : (
|
||||
debugLogs.map(log => (
|
||||
<View key={log.id} style={styles.debugLogEntry}>
|
||||
<Text style={styles.debugTimestamp}>{log.timestamp}</Text>
|
||||
<Text style={[
|
||||
styles.debugMessage,
|
||||
log.level === 'error' && styles.debugError,
|
||||
log.level === 'warn' && styles.debugWarn,
|
||||
log.level === 'success' && styles.debugSuccess,
|
||||
]}>
|
||||
{log.message}
|
||||
</Text>
|
||||
</View>
|
||||
))
|
||||
)}
|
||||
</ScrollView>
|
||||
</View>
|
||||
</View>
|
||||
</Modal>
|
||||
|
||||
{/* Messages */}
|
||||
<KeyboardAvoidingView
|
||||
style={styles.chatContainer}
|
||||
@ -1029,47 +609,6 @@ export default function ChatScreen() {
|
||||
|
||||
{/* Input */}
|
||||
<View style={styles.inputContainer}>
|
||||
{/* Voice Call Button - becomes pulsing bubble during call */}
|
||||
<Animated.View style={{ transform: [{ scale: pulseAnim }] }}>
|
||||
<TouchableOpacity
|
||||
style={[
|
||||
styles.voiceButton,
|
||||
isConnectingVoice && styles.voiceButtonConnecting,
|
||||
isCallActive && styles.voiceButtonActive,
|
||||
]}
|
||||
onPress={isCallActive ? endVoiceCall : startVoiceCall}
|
||||
disabled={isConnectingVoice}
|
||||
>
|
||||
{isConnectingVoice ? (
|
||||
<ActivityIndicator size="small" color={AppColors.primary} />
|
||||
) : isCallActive ? (
|
||||
<View style={styles.callActiveIndicator}>
|
||||
<Ionicons name="call" size={20} color={AppColors.white} />
|
||||
</View>
|
||||
) : (
|
||||
<Ionicons name="call" size={20} color={AppColors.primary} />
|
||||
)}
|
||||
</TouchableOpacity>
|
||||
</Animated.View>
|
||||
{/* Call duration badge */}
|
||||
{isCallActive && (
|
||||
<View style={styles.callDurationBadge}>
|
||||
<Text style={styles.callDurationText}>
|
||||
{Math.floor(callState.callDuration / 60).toString().padStart(2, '0')}:
|
||||
{(callState.callDuration % 60).toString().padStart(2, '0')}
|
||||
</Text>
|
||||
</View>
|
||||
)}
|
||||
{/* Audio output button - only during active call */}
|
||||
{isCallActive && (
|
||||
<TouchableOpacity
|
||||
style={styles.audioButton}
|
||||
onPress={showAudioPicker}
|
||||
>
|
||||
<Ionicons name="volume-high" size={20} color={AppColors.primary} />
|
||||
</TouchableOpacity>
|
||||
)}
|
||||
|
||||
<TextInput
|
||||
style={styles.input}
|
||||
placeholder="Type a message..."
|
||||
@ -1093,38 +632,6 @@ export default function ChatScreen() {
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
</KeyboardAvoidingView>
|
||||
|
||||
{/* Invisible LiveKit Room - runs in background during call */}
|
||||
{isCallActive && callState.token && callState.wsUrl && (
|
||||
<LiveKitRoom
|
||||
serverUrl={callState.wsUrl}
|
||||
token={callState.token}
|
||||
connect={true}
|
||||
audio={true}
|
||||
video={false}
|
||||
onConnected={() => {
|
||||
console.log('[Chat] LiveKit connected');
|
||||
addDebugLog('LiveKitRoom: CONNECTED to server!', 'success');
|
||||
}}
|
||||
onDisconnected={() => {
|
||||
addDebugLog('LiveKitRoom: DISCONNECTED', 'warn');
|
||||
endVoiceCall();
|
||||
}}
|
||||
onError={(error) => {
|
||||
const errorMsg = error?.message || 'Unknown error';
|
||||
addDebugLog(`LiveKitRoom ERROR: ${errorMsg}`, 'error');
|
||||
console.error('[Chat] LiveKit error:', error);
|
||||
Alert.alert('Voice Call Error', error.message);
|
||||
endVoiceCall();
|
||||
}}
|
||||
>
|
||||
<VoiceCallTranscriptHandler
|
||||
onTranscript={handleVoiceTranscript}
|
||||
onDurationUpdate={updateDuration}
|
||||
onLog={addDebugLog}
|
||||
/>
|
||||
</LiveKitRoom>
|
||||
)}
|
||||
</SafeAreaView>
|
||||
);
|
||||
}
|
||||
@ -1180,6 +687,10 @@ const styles = StyleSheet.create({
|
||||
padding: Spacing.xs,
|
||||
marginLeft: Spacing.sm,
|
||||
},
|
||||
speakingButton: {
|
||||
backgroundColor: AppColors.primaryLight || '#E3F2FD',
|
||||
borderRadius: BorderRadius.full,
|
||||
},
|
||||
chatContainer: {
|
||||
flex: 1,
|
||||
},
|
||||
@ -1263,59 +774,6 @@ const styles = StyleSheet.create({
|
||||
maxHeight: 100,
|
||||
marginRight: Spacing.sm,
|
||||
},
|
||||
voiceButton: {
|
||||
width: 44,
|
||||
height: 44,
|
||||
borderRadius: BorderRadius.full,
|
||||
backgroundColor: AppColors.surface,
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
marginRight: Spacing.sm,
|
||||
borderWidth: 1,
|
||||
borderColor: AppColors.primary,
|
||||
},
|
||||
voiceButtonConnecting: {
|
||||
borderColor: AppColors.success,
|
||||
backgroundColor: 'rgba(90, 200, 168, 0.1)',
|
||||
},
|
||||
voiceButtonActive: {
|
||||
backgroundColor: AppColors.error,
|
||||
borderColor: AppColors.error,
|
||||
},
|
||||
audioButton: {
|
||||
width: 44,
|
||||
height: 44,
|
||||
borderRadius: 22,
|
||||
backgroundColor: AppColors.surface,
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
marginRight: Spacing.sm,
|
||||
borderWidth: 1,
|
||||
borderColor: AppColors.primary,
|
||||
},
|
||||
callActiveIndicator: {
|
||||
width: '100%',
|
||||
height: '100%',
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
},
|
||||
callDurationBadge: {
|
||||
position: 'absolute',
|
||||
left: 32,
|
||||
top: -8,
|
||||
backgroundColor: AppColors.error,
|
||||
paddingHorizontal: 6,
|
||||
paddingVertical: 2,
|
||||
borderRadius: 8,
|
||||
minWidth: 42,
|
||||
alignItems: 'center',
|
||||
},
|
||||
callDurationText: {
|
||||
fontSize: 10,
|
||||
fontWeight: '600',
|
||||
color: AppColors.white,
|
||||
fontVariant: ['tabular-nums'],
|
||||
},
|
||||
sendButton: {
|
||||
width: 44,
|
||||
height: 44,
|
||||
@ -1437,19 +895,6 @@ const styles = StyleSheet.create({
|
||||
fontWeight: '500',
|
||||
color: AppColors.textPrimary,
|
||||
},
|
||||
// Voice message styles
|
||||
voiceBubble: {
|
||||
borderWidth: 1,
|
||||
borderColor: 'rgba(59, 130, 246, 0.3)',
|
||||
},
|
||||
voiceIndicator: {
|
||||
position: 'absolute',
|
||||
top: 6,
|
||||
right: 6,
|
||||
},
|
||||
voiceIndicatorEmoji: {
|
||||
fontSize: 10,
|
||||
},
|
||||
// System message styles
|
||||
systemMessageContainer: {
|
||||
flexDirection: 'row',
|
||||
@ -1476,59 +921,4 @@ const styles = StyleSheet.create({
|
||||
color: AppColors.textMuted,
|
||||
marginLeft: 4,
|
||||
},
|
||||
// Debug panel styles
|
||||
debugButtonActive: {
|
||||
backgroundColor: 'rgba(59, 130, 246, 0.1)',
|
||||
},
|
||||
debugModalContent: {
|
||||
maxHeight: '80%',
|
||||
},
|
||||
debugHeaderButtons: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
gap: Spacing.md,
|
||||
},
|
||||
debugHeaderBtn: {
|
||||
padding: Spacing.xs,
|
||||
},
|
||||
debugLogsContainer: {
|
||||
flex: 1,
|
||||
padding: Spacing.sm,
|
||||
backgroundColor: '#1a1a2e',
|
||||
},
|
||||
debugEmptyText: {
|
||||
color: AppColors.textMuted,
|
||||
textAlign: 'center',
|
||||
padding: Spacing.lg,
|
||||
fontSize: FontSizes.sm,
|
||||
},
|
||||
debugLogEntry: {
|
||||
flexDirection: 'row',
|
||||
paddingVertical: 3,
|
||||
borderBottomWidth: 1,
|
||||
borderBottomColor: 'rgba(255,255,255,0.05)',
|
||||
},
|
||||
debugTimestamp: {
|
||||
color: '#6b7280',
|
||||
fontSize: 11,
|
||||
fontFamily: Platform.OS === 'ios' ? 'Menlo' : 'monospace',
|
||||
marginRight: Spacing.sm,
|
||||
minWidth: 90,
|
||||
},
|
||||
debugMessage: {
|
||||
color: '#e5e7eb',
|
||||
fontSize: 11,
|
||||
fontFamily: Platform.OS === 'ios' ? 'Menlo' : 'monospace',
|
||||
flex: 1,
|
||||
flexWrap: 'wrap',
|
||||
},
|
||||
debugError: {
|
||||
color: '#ef4444',
|
||||
},
|
||||
debugWarn: {
|
||||
color: '#f59e0b',
|
||||
},
|
||||
debugSuccess: {
|
||||
color: '#10b981',
|
||||
},
|
||||
});
|
||||
|
||||
@ -1,6 +1,3 @@
|
||||
// WebRTC globals are now registered in useLiveKitRoom hook
|
||||
// before any LiveKit classes are loaded.
|
||||
|
||||
import { useEffect } from 'react';
|
||||
import { DarkTheme, DefaultTheme, ThemeProvider } from '@react-navigation/native';
|
||||
import { Stack, router, useSegments } from 'expo-router';
|
||||
@ -15,6 +12,7 @@ import { AuthProvider, useAuth } from '@/contexts/AuthContext';
|
||||
import { BeneficiaryProvider } from '@/contexts/BeneficiaryContext';
|
||||
import { VoiceTranscriptProvider } from '@/contexts/VoiceTranscriptContext';
|
||||
import { VoiceCallProvider } from '@/contexts/VoiceCallContext';
|
||||
import { VoiceProvider } from '@/contexts/VoiceContext';
|
||||
import { LoadingSpinner } from '@/components/ui/LoadingSpinner';
|
||||
import { FloatingCallBubble } from '@/components/FloatingCallBubble';
|
||||
|
||||
@ -70,7 +68,9 @@ export default function RootLayout() {
|
||||
<BeneficiaryProvider>
|
||||
<VoiceTranscriptProvider>
|
||||
<VoiceCallProvider>
|
||||
<VoiceProvider>
|
||||
<RootLayoutNav />
|
||||
</VoiceProvider>
|
||||
</VoiceCallProvider>
|
||||
</VoiceTranscriptProvider>
|
||||
</BeneficiaryProvider>
|
||||
|
||||
222
components/VoiceFAB.tsx
Normal file
222
components/VoiceFAB.tsx
Normal file
@ -0,0 +1,222 @@
|
||||
/**
|
||||
* Voice Floating Action Button Component
|
||||
*
|
||||
* A floating action button for toggling voice listening mode.
|
||||
* Tap to start/stop listening.
|
||||
* Hidden when a call is already active.
|
||||
*/
|
||||
|
||||
import React, { useRef, useEffect } from 'react';
|
||||
import {
|
||||
StyleSheet,
|
||||
TouchableOpacity,
|
||||
Animated,
|
||||
ViewStyle,
|
||||
} from 'react-native';
|
||||
import { Ionicons } from '@expo/vector-icons';
|
||||
import { useSafeAreaInsets } from 'react-native-safe-area-context';
|
||||
import * as Haptics from 'expo-haptics';
|
||||
import { AppColors, BorderRadius } from '@/constants/theme';
|
||||
import { useVoiceCall } from '@/contexts/VoiceCallContext';
|
||||
|
||||
interface VoiceFABProps {
|
||||
onPress: () => void;
|
||||
style?: ViewStyle;
|
||||
disabled?: boolean;
|
||||
isListening?: boolean;
|
||||
}
|
||||
|
||||
const FAB_SIZE = 56;
|
||||
|
||||
export function VoiceFAB({ onPress, style, disabled = false, isListening = false }: VoiceFABProps) {
|
||||
const { isCallActive } = useVoiceCall();
|
||||
const insets = useSafeAreaInsets();
|
||||
|
||||
// Animation values
|
||||
const scale = useRef(new Animated.Value(1)).current;
|
||||
const opacity = useRef(new Animated.Value(1)).current;
|
||||
const pulseScale = useRef(new Animated.Value(1)).current;
|
||||
const pulseOpacity = useRef(new Animated.Value(0)).current;
|
||||
|
||||
// Hide FAB when call is active
|
||||
useEffect(() => {
|
||||
if (isCallActive) {
|
||||
Animated.parallel([
|
||||
Animated.timing(scale, {
|
||||
toValue: 0,
|
||||
duration: 200,
|
||||
useNativeDriver: true,
|
||||
}),
|
||||
Animated.timing(opacity, {
|
||||
toValue: 0,
|
||||
duration: 200,
|
||||
useNativeDriver: true,
|
||||
}),
|
||||
]).start();
|
||||
} else {
|
||||
Animated.parallel([
|
||||
Animated.spring(scale, {
|
||||
toValue: 1,
|
||||
friction: 5,
|
||||
tension: 40,
|
||||
useNativeDriver: true,
|
||||
}),
|
||||
Animated.timing(opacity, {
|
||||
toValue: 1,
|
||||
duration: 200,
|
||||
useNativeDriver: true,
|
||||
}),
|
||||
]).start();
|
||||
}
|
||||
}, [isCallActive, scale, opacity]);
|
||||
|
||||
// Pulse animation when listening
|
||||
useEffect(() => {
|
||||
if (isListening && !isCallActive) {
|
||||
// Start pulsing animation
|
||||
const pulseAnimation = Animated.loop(
|
||||
Animated.sequence([
|
||||
Animated.parallel([
|
||||
Animated.timing(pulseScale, {
|
||||
toValue: 1.8,
|
||||
duration: 1000,
|
||||
useNativeDriver: true,
|
||||
}),
|
||||
Animated.timing(pulseOpacity, {
|
||||
toValue: 0,
|
||||
duration: 1000,
|
||||
useNativeDriver: true,
|
||||
}),
|
||||
]),
|
||||
Animated.parallel([
|
||||
Animated.timing(pulseScale, {
|
||||
toValue: 1,
|
||||
duration: 0,
|
||||
useNativeDriver: true,
|
||||
}),
|
||||
Animated.timing(pulseOpacity, {
|
||||
toValue: 0.6,
|
||||
duration: 0,
|
||||
useNativeDriver: true,
|
||||
}),
|
||||
]),
|
||||
])
|
||||
);
|
||||
pulseAnimation.start();
|
||||
|
||||
return () => {
|
||||
pulseAnimation.stop();
|
||||
pulseScale.setValue(1);
|
||||
pulseOpacity.setValue(0);
|
||||
};
|
||||
} else {
|
||||
pulseScale.setValue(1);
|
||||
pulseOpacity.setValue(0);
|
||||
}
|
||||
}, [isListening, isCallActive, pulseScale, pulseOpacity]);
|
||||
|
||||
// Press animation with haptic feedback
|
||||
const handlePressIn = () => {
|
||||
Haptics.impactAsync(Haptics.ImpactFeedbackStyle.Medium);
|
||||
Animated.spring(scale, {
|
||||
toValue: 0.9,
|
||||
friction: 5,
|
||||
useNativeDriver: true,
|
||||
}).start();
|
||||
};
|
||||
|
||||
const handlePressOut = () => {
|
||||
Animated.spring(scale, {
|
||||
toValue: 1,
|
||||
friction: 5,
|
||||
useNativeDriver: true,
|
||||
}).start();
|
||||
};
|
||||
|
||||
// Don't render if call is active
|
||||
if (isCallActive) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<Animated.View
|
||||
style={[
|
||||
styles.container,
|
||||
{
|
||||
bottom: insets.bottom + 80, // Above tab bar
|
||||
transform: [{ scale }],
|
||||
opacity,
|
||||
},
|
||||
style,
|
||||
]}
|
||||
>
|
||||
{/* Pulse ring when listening */}
|
||||
{isListening && (
|
||||
<Animated.View
|
||||
style={[
|
||||
styles.pulseRing,
|
||||
{
|
||||
transform: [{ scale: pulseScale }],
|
||||
opacity: pulseOpacity,
|
||||
},
|
||||
]}
|
||||
/>
|
||||
)}
|
||||
<TouchableOpacity
|
||||
style={[
|
||||
styles.fab,
|
||||
isListening && styles.fabListening,
|
||||
disabled && styles.fabDisabled,
|
||||
]}
|
||||
onPress={onPress}
|
||||
onPressIn={handlePressIn}
|
||||
onPressOut={handlePressOut}
|
||||
disabled={disabled}
|
||||
activeOpacity={0.9}
|
||||
>
|
||||
<Ionicons
|
||||
name={isListening ? 'mic' : 'mic-outline'}
|
||||
size={28}
|
||||
color={disabled ? AppColors.textMuted : AppColors.white}
|
||||
/>
|
||||
</TouchableOpacity>
|
||||
</Animated.View>
|
||||
);
|
||||
}
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
container: {
|
||||
position: 'absolute',
|
||||
left: 0,
|
||||
right: 0,
|
||||
alignItems: 'center',
|
||||
zIndex: 100,
|
||||
},
|
||||
pulseRing: {
|
||||
position: 'absolute',
|
||||
width: FAB_SIZE,
|
||||
height: FAB_SIZE,
|
||||
borderRadius: BorderRadius.full,
|
||||
backgroundColor: AppColors.error,
|
||||
},
|
||||
fab: {
|
||||
width: FAB_SIZE,
|
||||
height: FAB_SIZE,
|
||||
borderRadius: BorderRadius.full,
|
||||
backgroundColor: AppColors.success,
|
||||
justifyContent: 'center',
|
||||
alignItems: 'center',
|
||||
shadowColor: '#000',
|
||||
shadowOffset: { width: 0, height: 4 },
|
||||
shadowOpacity: 0.3,
|
||||
shadowRadius: 8,
|
||||
elevation: 8,
|
||||
},
|
||||
fabListening: {
|
||||
backgroundColor: AppColors.error,
|
||||
},
|
||||
fabDisabled: {
|
||||
backgroundColor: AppColors.surface,
|
||||
shadowOpacity: 0.1,
|
||||
},
|
||||
});
|
||||
@ -12,7 +12,7 @@ interface VoiceCallState {
|
||||
isActive: boolean;
|
||||
// Whether the call UI is minimized (showing bubble instead of full screen)
|
||||
isMinimized: boolean;
|
||||
// LiveKit connection details
|
||||
// Voice service connection details
|
||||
token: string | undefined;
|
||||
wsUrl: string | undefined;
|
||||
// Call metadata
|
||||
|
||||
410
contexts/VoiceContext.tsx
Normal file
410
contexts/VoiceContext.tsx
Normal file
@ -0,0 +1,410 @@
|
||||
/**
|
||||
* Voice Context - Local STT/TTS integration with WellNuo API
|
||||
*
|
||||
* Provides voice session management:
|
||||
* - STT (Speech-to-Text) via expo-speech-recognition
|
||||
* - API calls to WellNuo ask_wellnuo_ai
|
||||
* - TTS (Text-to-Speech) via expo-speech
|
||||
*
|
||||
* Flow: User speaks → STT → API → Response → TTS → Continue listening
|
||||
*/
|
||||
|
||||
import React, {
|
||||
createContext,
|
||||
useContext,
|
||||
useState,
|
||||
useCallback,
|
||||
useRef,
|
||||
ReactNode,
|
||||
} from 'react';
|
||||
import * as Speech from 'expo-speech';
|
||||
import { api } from '@/services/api';
|
||||
import { useVoiceTranscript } from './VoiceTranscriptContext';
|
||||
|
||||
// WellNuo API configuration (same as chat.tsx)
|
||||
const API_URL = 'https://eluxnetworks.net/function/well-api/api';
|
||||
const WELLNUO_USER = 'anandk';
|
||||
const WELLNUO_PASSWORD = 'anandk_8';
|
||||
|
||||
// Single deployment mode - sends only deployment_id (no beneficiary_names_dict)
|
||||
const SINGLE_DEPLOYMENT_MODE = true;
|
||||
|
||||
// Keywords for question normalization (same as chat.tsx)
|
||||
const STATUS_KEYWORDS = [
|
||||
/\bhow\s+is\b/i,
|
||||
/\bhow'?s\b/i,
|
||||
/\bhow\s+are\b/i,
|
||||
/\btell\s+me\s+about\b/i,
|
||||
/\bwhat'?s\s+up\s+with\b/i,
|
||||
/\bupdate\s+on\b/i,
|
||||
/\bstatus\b/i,
|
||||
/\bdoing\b/i,
|
||||
/\bfeeling\b/i,
|
||||
/\bcheck\s+on\b/i,
|
||||
/\bis\s+\w+\s+okay\b/i,
|
||||
/\bis\s+\w+\s+alright\b/i,
|
||||
/\bis\s+\w+\s+fine\b/i,
|
||||
/\bokay\?\b/i,
|
||||
/\balright\?\b/i,
|
||||
];
|
||||
|
||||
const SUBJECT_KEYWORDS = [
|
||||
/\bdad\b/i,
|
||||
/\bfather\b/i,
|
||||
/\bferdinand\b/i,
|
||||
/\bhim\b/i,
|
||||
/\bhe\b/i,
|
||||
/\bmy\s+dad\b/i,
|
||||
/\bmy\s+father\b/i,
|
||||
/\bthe\s+patient\b/i,
|
||||
/\bloved\s+one\b/i,
|
||||
/\bparent\b/i,
|
||||
/\bgrandpa\b/i,
|
||||
/\bgrandfather\b/i,
|
||||
];
|
||||
|
||||
/**
|
||||
* Normalize question for WellNuo API (same logic as chat.tsx)
|
||||
*/
|
||||
function normalizeQuestion(userMessage: string): string {
|
||||
const msgLower = userMessage.toLowerCase().trim();
|
||||
|
||||
const isStatusQuery = STATUS_KEYWORDS.some((pattern) => pattern.test(msgLower));
|
||||
const isAboutRecipient = SUBJECT_KEYWORDS.some((pattern) => pattern.test(msgLower));
|
||||
|
||||
if (isStatusQuery && isAboutRecipient) {
|
||||
console.log(`[VoiceContext] Normalized '${userMessage}' -> 'how is dad doing'`);
|
||||
return 'how is dad doing';
|
||||
}
|
||||
|
||||
if (isStatusQuery && !isAboutRecipient) {
|
||||
console.log(
|
||||
`[VoiceContext] Normalized '${userMessage}' -> 'how is dad doing' (assumed recipient)`
|
||||
);
|
||||
return 'how is dad doing';
|
||||
}
|
||||
|
||||
console.log(`[VoiceContext] No normalization applied to: '${userMessage}'`);
|
||||
return userMessage;
|
||||
}
|
||||
|
||||
export type VoiceStatus = 'idle' | 'listening' | 'processing' | 'speaking';
|
||||
|
||||
interface VoiceContextValue {
|
||||
// Current status of the voice session
|
||||
status: VoiceStatus;
|
||||
// Whether voice session is active (not idle)
|
||||
isActive: boolean;
|
||||
// Whether STT is currently listening
|
||||
isListening: boolean;
|
||||
// Whether TTS is currently speaking
|
||||
isSpeaking: boolean;
|
||||
// Whether processing API request
|
||||
isProcessing: boolean;
|
||||
// Current/last transcript from STT
|
||||
transcript: string;
|
||||
// Partial transcript (real-time preview)
|
||||
partialTranscript: string;
|
||||
// Last API response
|
||||
lastResponse: string | null;
|
||||
// Error message if any
|
||||
error: string | null;
|
||||
|
||||
// Start voice session (begin listening)
|
||||
startSession: () => void;
|
||||
// Stop voice session
|
||||
stopSession: () => void;
|
||||
|
||||
// Send transcript to API and get response with TTS
|
||||
// Called automatically when STT detects speech end, or manually
|
||||
sendTranscript: (text: string) => Promise<string | null>;
|
||||
|
||||
// Update transcript from external STT hook
|
||||
setTranscript: (text: string) => void;
|
||||
setPartialTranscript: (text: string) => void;
|
||||
|
||||
// Set status from external STT/TTS hooks
|
||||
setStatus: (status: VoiceStatus) => void;
|
||||
setIsListening: (listening: boolean) => void;
|
||||
setIsSpeaking: (speaking: boolean) => void;
|
||||
|
||||
// Speak text using TTS
|
||||
speak: (text: string) => Promise<void>;
|
||||
// Stop TTS
|
||||
stopSpeaking: () => void;
|
||||
// Interrupt TTS if speaking (call when user starts talking)
|
||||
interruptIfSpeaking: () => boolean;
|
||||
}
|
||||
|
||||
const VoiceContext = createContext<VoiceContextValue | undefined>(undefined);
|
||||
|
||||
export function VoiceProvider({ children }: { children: ReactNode }) {
|
||||
const [status, setStatus] = useState<VoiceStatus>('idle');
|
||||
const [transcript, setTranscript] = useState('');
|
||||
const [partialTranscript, setPartialTranscript] = useState('');
|
||||
const [lastResponse, setLastResponse] = useState<string | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
|
||||
// Voice transcript context for chat display
|
||||
const { addTranscriptEntry } = useVoiceTranscript();
|
||||
|
||||
// API token cache
|
||||
const apiTokenRef = useRef<string | null>(null);
|
||||
|
||||
// Deployment ID from settings
|
||||
const deploymentIdRef = useRef<string | null>(null);
|
||||
|
||||
// Load deployment ID on mount
|
||||
React.useEffect(() => {
|
||||
const loadDeploymentId = async () => {
|
||||
const savedId = await api.getDeploymentId();
|
||||
deploymentIdRef.current = savedId;
|
||||
console.log('[VoiceContext] Loaded deployment ID:', savedId);
|
||||
};
|
||||
loadDeploymentId();
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Get WellNuo API token (same as chat.tsx)
|
||||
*/
|
||||
const getWellNuoToken = useCallback(async (): Promise<string> => {
|
||||
if (apiTokenRef.current) {
|
||||
return apiTokenRef.current;
|
||||
}
|
||||
|
||||
const nonce = Math.floor(Math.random() * 1000000).toString();
|
||||
const response = await fetch(API_URL, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||
body: new URLSearchParams({
|
||||
function: 'credentials',
|
||||
clientId: 'MA_001',
|
||||
user_name: WELLNUO_USER,
|
||||
ps: WELLNUO_PASSWORD,
|
||||
nonce: nonce,
|
||||
}).toString(),
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
if (data.status === '200 OK' && data.access_token) {
|
||||
apiTokenRef.current = data.access_token;
|
||||
console.log('[VoiceContext] WellNuo token obtained');
|
||||
return data.access_token;
|
||||
}
|
||||
throw new Error('Failed to authenticate with WellNuo API');
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Send transcript to WellNuo API and speak the response
|
||||
*/
|
||||
const sendTranscript = useCallback(
|
||||
async (text: string): Promise<string | null> => {
|
||||
const trimmedText = text.trim();
|
||||
if (!trimmedText) {
|
||||
console.log('[VoiceContext] Empty transcript, skipping API call');
|
||||
return null;
|
||||
}
|
||||
|
||||
console.log('[VoiceContext] Sending transcript to API:', trimmedText);
|
||||
setStatus('processing');
|
||||
setError(null);
|
||||
|
||||
// Add user message to transcript for chat display
|
||||
addTranscriptEntry('user', trimmedText);
|
||||
|
||||
try {
|
||||
// Get API token
|
||||
const token = await getWellNuoToken();
|
||||
|
||||
// Normalize question
|
||||
const normalizedQuestion = normalizeQuestion(trimmedText);
|
||||
|
||||
// Get deployment ID
|
||||
const deploymentId = deploymentIdRef.current || '21';
|
||||
|
||||
// Build request params
|
||||
const requestParams: Record<string, string> = {
|
||||
function: 'ask_wellnuo_ai',
|
||||
clientId: 'MA_001',
|
||||
user_name: WELLNUO_USER,
|
||||
token: token,
|
||||
question: normalizedQuestion,
|
||||
deployment_id: deploymentId,
|
||||
};
|
||||
|
||||
// Only add beneficiary_names_dict if NOT in single deployment mode
|
||||
if (!SINGLE_DEPLOYMENT_MODE) {
|
||||
// For full app, would include beneficiary names dict
|
||||
// Currently single deployment mode only
|
||||
}
|
||||
|
||||
const response = await fetch(API_URL, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||
body: new URLSearchParams(requestParams).toString(),
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.ok && data.response?.body) {
|
||||
const responseText = data.response.body;
|
||||
console.log('[VoiceContext] API response:', responseText.slice(0, 100) + '...');
|
||||
setLastResponse(responseText);
|
||||
|
||||
// Add Julia's response to transcript for chat display
|
||||
addTranscriptEntry('assistant', responseText);
|
||||
|
||||
// Speak the response
|
||||
await speak(responseText);
|
||||
|
||||
return responseText;
|
||||
} else {
|
||||
// Token might be expired
|
||||
if (data.status === '401 Unauthorized') {
|
||||
apiTokenRef.current = null;
|
||||
throw new Error('Session expired, please try again');
|
||||
}
|
||||
throw new Error(data.message || 'Could not get response');
|
||||
}
|
||||
} catch (err) {
|
||||
const errorMsg = err instanceof Error ? err.message : 'Unknown error';
|
||||
console.error('[VoiceContext] API error:', errorMsg);
|
||||
setError(errorMsg);
|
||||
setStatus('idle');
|
||||
return null;
|
||||
}
|
||||
},
|
||||
[getWellNuoToken, addTranscriptEntry]
|
||||
);
|
||||
|
||||
/**
|
||||
* Interrupt TTS when user starts speaking
|
||||
* Call this from the STT hook when voice activity is detected
|
||||
*/
|
||||
const interruptIfSpeaking = useCallback(() => {
|
||||
if (isSpeaking) {
|
||||
console.log('[VoiceContext] User interrupted - stopping TTS');
|
||||
Speech.stop();
|
||||
setIsSpeaking(false);
|
||||
setStatus('listening');
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}, [isSpeaking]);
|
||||
|
||||
/**
|
||||
* Speak text using TTS
|
||||
*/
|
||||
const speak = useCallback(async (text: string): Promise<void> => {
|
||||
if (!text.trim()) return;
|
||||
|
||||
console.log('[VoiceContext] Speaking:', text.slice(0, 50) + '...');
|
||||
setStatus('speaking');
|
||||
setIsSpeaking(true);
|
||||
|
||||
return new Promise((resolve) => {
|
||||
Speech.speak(text, {
|
||||
language: 'en-US',
|
||||
rate: 0.9,
|
||||
pitch: 1.0,
|
||||
onStart: () => {
|
||||
console.log('[VoiceContext] TTS started');
|
||||
},
|
||||
onDone: () => {
|
||||
console.log('[VoiceContext] TTS completed');
|
||||
setIsSpeaking(false);
|
||||
// Return to listening state after speaking (if session is active)
|
||||
setStatus('listening');
|
||||
resolve();
|
||||
},
|
||||
onError: (error) => {
|
||||
console.error('[VoiceContext] TTS error:', error);
|
||||
setIsSpeaking(false);
|
||||
setStatus('listening');
|
||||
resolve();
|
||||
},
|
||||
onStopped: () => {
|
||||
console.log('[VoiceContext] TTS stopped (interrupted)');
|
||||
setIsSpeaking(false);
|
||||
setStatus('listening');
|
||||
resolve();
|
||||
},
|
||||
});
|
||||
});
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Stop TTS playback
|
||||
*/
|
||||
const stopSpeaking = useCallback(() => {
|
||||
Speech.stop();
|
||||
setIsSpeaking(false);
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Start voice session
|
||||
*/
|
||||
const startSession = useCallback(() => {
|
||||
console.log('[VoiceContext] Starting voice session');
|
||||
setStatus('listening');
|
||||
setIsListening(true);
|
||||
setError(null);
|
||||
setTranscript('');
|
||||
setPartialTranscript('');
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Stop voice session
|
||||
*/
|
||||
const stopSession = useCallback(() => {
|
||||
console.log('[VoiceContext] Stopping voice session');
|
||||
Speech.stop();
|
||||
setStatus('idle');
|
||||
setIsListening(false);
|
||||
setIsSpeaking(false);
|
||||
setError(null);
|
||||
}, []);
|
||||
|
||||
// Computed values
|
||||
const isActive = status !== 'idle';
|
||||
const isProcessing = status === 'processing';
|
||||
|
||||
return (
|
||||
<VoiceContext.Provider
|
||||
value={{
|
||||
status,
|
||||
isActive,
|
||||
isListening,
|
||||
isSpeaking,
|
||||
isProcessing,
|
||||
transcript,
|
||||
partialTranscript,
|
||||
lastResponse,
|
||||
error,
|
||||
startSession,
|
||||
stopSession,
|
||||
sendTranscript,
|
||||
setTranscript,
|
||||
setPartialTranscript,
|
||||
setStatus,
|
||||
setIsListening,
|
||||
setIsSpeaking,
|
||||
speak,
|
||||
stopSpeaking,
|
||||
interruptIfSpeaking,
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
</VoiceContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
export function useVoice() {
|
||||
const context = useContext(VoiceContext);
|
||||
if (!context) {
|
||||
throw new Error('useVoice must be used within VoiceProvider');
|
||||
}
|
||||
return context;
|
||||
}
|
||||
@ -1,279 +0,0 @@
|
||||
# Julia AI Voice Integration
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ WellNuo Lite App (iOS) │
|
||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
||||
│ │ Voice Call Screen (app/voice-call.tsx) │ │
|
||||
│ │ - useLiveKitRoom hook │ │
|
||||
│ │ - Audio session management │ │
|
||||
│ │ - Microphone permission handling │ │
|
||||
│ └───────────────────────┬─────────────────────────────────┘ │
|
||||
│ │ WebSocket + WebRTC │
|
||||
└──────────────────────────┼──────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ LiveKit Cloud │
|
||||
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
|
||||
│ │ SFU Server │ │ Room Mgmt │ │ Agent Hosting │ │
|
||||
│ │ (WebRTC) │ │ (Token Auth) │ │ (Python) │ │
|
||||
│ └────────┬────────┘ └─────────────────┘ └────────┬────────┘ │
|
||||
│ │ │ │
|
||||
│ └──────────────────────────────────────────┘ │
|
||||
│ │ Audio Streams │
|
||||
└──────────────────────────┼──────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Julia AI Agent (Python) │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │
|
||||
│ │ Deepgram │ │ Deepgram │ │ WellNuo voice_ask API │ │
|
||||
│ │ STT │ │ TTS │ │ (Custom LLM backend) │ │
|
||||
│ │ (Nova-2) │ │ (Aura) │ │ │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### 1. React Native Client
|
||||
|
||||
**Location:** `app/voice-call.tsx`, `hooks/useLiveKitRoom.ts`
|
||||
|
||||
**Dependencies:**
|
||||
- `@livekit/react-native` - LiveKit React Native SDK
|
||||
- `@livekit/react-native-webrtc` - WebRTC for React Native
|
||||
- `expo-av` - Audio session management
|
||||
|
||||
**Key Features:**
|
||||
- Connects to LiveKit room with JWT token
|
||||
- Manages audio session (activates speaker mode)
|
||||
- Handles microphone permissions
|
||||
- Displays connection state and transcription
|
||||
|
||||
### 2. LiveKit Cloud
|
||||
|
||||
**Project:** `live-kit-demo-70txlh6a`
|
||||
**Agent ID:** `CA_Yd3qcuYEVKKE`
|
||||
|
||||
**Configuration:**
|
||||
- Auto-scaling agent workers
|
||||
- Managed STT/TTS through inference endpoints
|
||||
- Built-in noise cancellation
|
||||
|
||||
**Getting Tokens:**
|
||||
```typescript
|
||||
// From WellNuo backend
|
||||
const response = await fetch('/api/livekit/token', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ roomName, userName })
|
||||
});
|
||||
const { token, url } = await response.json();
|
||||
```
|
||||
|
||||
### 3. Julia AI Agent (Python)
|
||||
|
||||
**Location:** `julia-agent/julia-ai/src/agent.py`
|
||||
|
||||
**Stack:**
|
||||
- LiveKit Agents SDK
|
||||
- Deepgram Nova-2 (STT)
|
||||
- Deepgram Aura Asteria (TTS - female voice)
|
||||
- Silero VAD (Voice Activity Detection)
|
||||
- Custom WellNuo LLM (voice_ask API)
|
||||
|
||||
## Setup & Deployment
|
||||
|
||||
### Prerequisites
|
||||
|
||||
1. **LiveKit Cloud Account**
|
||||
- Sign up at https://cloud.livekit.io/
|
||||
- Create a project
|
||||
- Get API credentials
|
||||
|
||||
2. **LiveKit CLI**
|
||||
```bash
|
||||
# macOS
|
||||
brew install livekit-cli
|
||||
|
||||
# Login
|
||||
lk cloud auth
|
||||
```
|
||||
|
||||
### Agent Deployment
|
||||
|
||||
1. **Navigate to agent directory:**
|
||||
```bash
|
||||
cd julia-agent/julia-ai
|
||||
```
|
||||
|
||||
2. **Install dependencies:**
|
||||
```bash
|
||||
uv sync
|
||||
```
|
||||
|
||||
3. **Configure environment:**
|
||||
```bash
|
||||
cp .env.example .env.local
|
||||
# Add LIVEKIT_URL, LIVEKIT_API_KEY, LIVEKIT_API_SECRET
|
||||
```
|
||||
|
||||
4. **Local development:**
|
||||
```bash
|
||||
uv run python src/agent.py dev
|
||||
```
|
||||
|
||||
5. **Deploy to LiveKit Cloud:**
|
||||
```bash
|
||||
lk agent deploy
|
||||
```
|
||||
|
||||
### React Native Setup
|
||||
|
||||
1. **Install packages:**
|
||||
```bash
|
||||
npm install @livekit/react-native @livekit/react-native-webrtc
|
||||
```
|
||||
|
||||
2. **iOS permissions (Info.plist):**
|
||||
```xml
|
||||
<key>NSMicrophoneUsageDescription</key>
|
||||
<string>WellNuo needs microphone access for voice calls with Julia AI</string>
|
||||
```
|
||||
|
||||
3. **Pod install:**
|
||||
```bash
|
||||
cd ios && pod install
|
||||
```
|
||||
|
||||
## Flow Diagram
|
||||
|
||||
```
|
||||
User opens Voice tab
|
||||
│
|
||||
▼
|
||||
Request microphone permission
|
||||
│
|
||||
├─ Denied → Show error
|
||||
│
|
||||
▼
|
||||
Get LiveKit token from WellNuo API
|
||||
│
|
||||
▼
|
||||
Connect to LiveKit room
|
||||
│
|
||||
▼
|
||||
Agent joins automatically (LiveKit Cloud)
|
||||
│
|
||||
▼
|
||||
Agent sends greeting (TTS)
|
||||
│
|
||||
▼
|
||||
User speaks → STT → WellNuo API → Response → TTS
|
||||
│
|
||||
▼
|
||||
User ends call → Disconnect from room
|
||||
```
|
||||
|
||||
## API Integration
|
||||
|
||||
### WellNuo voice_ask API
|
||||
|
||||
The agent uses WellNuo's `voice_ask` API to get contextual responses about the beneficiary.
|
||||
|
||||
**Endpoint:** `https://eluxnetworks.net/function/well-api/api`
|
||||
|
||||
**Authentication:**
|
||||
```python
|
||||
data = {
|
||||
"function": "credentials",
|
||||
"clientId": "001",
|
||||
"user_name": WELLNUO_USER,
|
||||
"ps": WELLNUO_PASSWORD,
|
||||
"nonce": str(random.randint(0, 999999)),
|
||||
}
|
||||
```
|
||||
|
||||
**Voice Ask:**
|
||||
```python
|
||||
data = {
|
||||
"function": "voice_ask",
|
||||
"clientId": "001",
|
||||
"user_name": WELLNUO_USER,
|
||||
"token": token,
|
||||
"question": user_message,
|
||||
"deployment_id": DEPLOYMENT_ID,
|
||||
}
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **No audio playback on iOS**
|
||||
- Check audio session configuration
|
||||
- Ensure `expo-av` is properly configured
|
||||
- Test on real device (simulator has audio limitations)
|
||||
|
||||
2. **Microphone not working**
|
||||
- Verify permissions in Info.plist
|
||||
- Check if user granted permission
|
||||
- Real device required for full audio testing
|
||||
|
||||
3. **Agent not responding**
|
||||
- Check agent logs: `lk agent logs`
|
||||
- Verify LIVEKIT credentials
|
||||
- Check WellNuo API connectivity
|
||||
|
||||
4. **Connection fails**
|
||||
- Verify token is valid
|
||||
- Check network connectivity
|
||||
- Ensure LiveKit URL is correct
|
||||
|
||||
### Debugging
|
||||
|
||||
```bash
|
||||
# View agent logs
|
||||
lk agent logs
|
||||
|
||||
# View specific deployment logs
|
||||
lk agent logs --version v20260119031418
|
||||
|
||||
# Check agent status
|
||||
lk agent list
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
### Agent (.env.local)
|
||||
```
|
||||
LIVEKIT_URL=wss://live-kit-demo-70txlh6a.livekit.cloud
|
||||
LIVEKIT_API_KEY=your-api-key
|
||||
LIVEKIT_API_SECRET=your-api-secret
|
||||
WELLNUO_USER=anandk
|
||||
WELLNUO_PASSWORD=anandk_8
|
||||
DEPLOYMENT_ID=21
|
||||
```
|
||||
|
||||
### React Native (via WellNuo backend)
|
||||
Token generation handled server-side for security.
|
||||
|
||||
## Status
|
||||
|
||||
**Current State:** WIP - Not tested on real device
|
||||
|
||||
**Working:**
|
||||
- Agent deploys to LiveKit Cloud
|
||||
- Agent connects to rooms
|
||||
- STT/TTS pipeline configured
|
||||
- WellNuo API integration
|
||||
- React Native UI
|
||||
|
||||
**Needs Testing:**
|
||||
- Real device microphone capture
|
||||
- Audio playback on physical iOS device
|
||||
- Full conversation loop end-to-end
|
||||
- Token refresh/expiration handling
|
||||
@ -1,707 +0,0 @@
|
||||
/**
|
||||
* useLiveKitRoom - Hook for LiveKit voice call with Julia AI
|
||||
*
|
||||
* IMPORTANT: This hook encapsulates ALL LiveKit logic.
|
||||
* The UI component should only use the returned state and actions.
|
||||
*
|
||||
* LOGGING: Maximum transparency - every step is logged!
|
||||
*/
|
||||
|
||||
import { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import { Platform, AppState, AppStateStatus, NativeModules } from 'react-native';
|
||||
import type { Room as RoomType } from 'livekit-client';
|
||||
|
||||
// Helper to detect iOS Simulator
|
||||
// Expo Go and production builds both work with this approach
|
||||
const isIOSSimulator = (): boolean => {
|
||||
if (Platform.OS !== 'ios') return false;
|
||||
// Check via DeviceInfo module if available
|
||||
const { PlatformConstants } = NativeModules;
|
||||
return PlatformConstants?.interfaceIdiom === 'simulator' ||
|
||||
PlatformConstants?.isSimulator === true;
|
||||
};
|
||||
import { getToken, VOICE_NAME, BeneficiaryData } from '@/services/livekitService';
|
||||
import {
|
||||
configureAudioForVoiceCall,
|
||||
stopAudioSession,
|
||||
reconfigureAudioForPlayback,
|
||||
} from '@/utils/audioSession';
|
||||
import { callManager } from '@/services/callManager';
|
||||
|
||||
// Connection states
|
||||
export type ConnectionState =
|
||||
| 'idle'
|
||||
| 'initializing'
|
||||
| 'configuring_audio'
|
||||
| 'requesting_token'
|
||||
| 'connecting'
|
||||
| 'connected'
|
||||
| 'reconnecting'
|
||||
| 'disconnecting'
|
||||
| 'disconnected'
|
||||
| 'error';
|
||||
|
||||
// Log entry type
|
||||
export interface LogEntry {
|
||||
timestamp: string; // Formatted time string (HH:MM:SS.mmm)
|
||||
level: 'info' | 'warn' | 'error' | 'success';
|
||||
message: string;
|
||||
}
|
||||
|
||||
// Hook options
|
||||
export interface UseLiveKitRoomOptions {
|
||||
userId: string;
|
||||
beneficiaryData?: BeneficiaryData;
|
||||
onTranscript?: (role: 'user' | 'assistant', text: string) => void;
|
||||
autoConnect?: boolean;
|
||||
}
|
||||
|
||||
// Hook return type
|
||||
export interface UseLiveKitRoomReturn {
|
||||
// Connection state
|
||||
state: ConnectionState;
|
||||
error: string | null;
|
||||
|
||||
// Call info
|
||||
roomName: string | null;
|
||||
callDuration: number;
|
||||
|
||||
// Audio state
|
||||
isMuted: boolean;
|
||||
isAgentSpeaking: boolean;
|
||||
canPlayAudio: boolean;
|
||||
|
||||
// Debug info
|
||||
logs: LogEntry[];
|
||||
participantCount: number;
|
||||
|
||||
// Actions
|
||||
connect: () => Promise<void>;
|
||||
disconnect: () => Promise<void>;
|
||||
toggleMute: () => Promise<void>;
|
||||
clearLogs: () => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main hook for LiveKit voice calls
|
||||
*/
|
||||
export function useLiveKitRoom(options: UseLiveKitRoomOptions): UseLiveKitRoomReturn {
|
||||
const { userId, beneficiaryData, onTranscript, autoConnect = false } = options;
|
||||
|
||||
// State
|
||||
const [state, setState] = useState<ConnectionState>('idle');
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [roomName, setRoomName] = useState<string | null>(null);
|
||||
const [callDuration, setCallDuration] = useState(0);
|
||||
const [isMuted, setIsMuted] = useState(false);
|
||||
const [isAgentSpeaking, setIsAgentSpeaking] = useState(false);
|
||||
const [canPlayAudio, setCanPlayAudio] = useState(false);
|
||||
const [logs, setLogs] = useState<LogEntry[]>([]);
|
||||
const [participantCount, setParticipantCount] = useState(0);
|
||||
|
||||
// Refs
|
||||
const roomRef = useRef<RoomType | null>(null);
|
||||
const callStartTimeRef = useRef<number | null>(null);
|
||||
const connectionIdRef = useRef(0);
|
||||
const isUnmountingRef = useRef(false);
|
||||
const appStateRef = useRef<AppStateStatus>(AppState.currentState);
|
||||
const callIdRef = useRef<string | null>(null);
|
||||
|
||||
// ===================
|
||||
// LOGGING FUNCTIONS
|
||||
// ===================
|
||||
|
||||
const log = useCallback((level: LogEntry['level'], message: string) => {
|
||||
const now = new Date();
|
||||
const timestamp = now.toLocaleTimeString('en-US', {
|
||||
hour12: false,
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
second: '2-digit',
|
||||
}) + '.' + now.getMilliseconds().toString().padStart(3, '0');
|
||||
|
||||
const entry: LogEntry = {
|
||||
timestamp,
|
||||
level,
|
||||
message,
|
||||
};
|
||||
setLogs((prev) => [...prev, entry]);
|
||||
|
||||
// Also log to console with color
|
||||
const prefix = `[LiveKit ${timestamp}]`;
|
||||
switch (level) {
|
||||
case 'error':
|
||||
console.error(`${prefix} ERROR: ${message}`);
|
||||
break;
|
||||
case 'warn':
|
||||
console.warn(`${prefix} WARN: ${message}`);
|
||||
break;
|
||||
case 'success':
|
||||
console.log(`${prefix} SUCCESS: ${message}`);
|
||||
break;
|
||||
default:
|
||||
console.log(`${prefix} INFO: ${message}`);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const logInfo = useCallback((msg: string) => log('info', msg), [log]);
|
||||
const logWarn = useCallback((msg: string) => log('warn', msg), [log]);
|
||||
const logError = useCallback((msg: string) => log('error', msg), [log]);
|
||||
const logSuccess = useCallback((msg: string) => log('success', msg), [log]);
|
||||
|
||||
const clearLogs = useCallback(() => {
|
||||
setLogs([]);
|
||||
}, []);
|
||||
|
||||
// ===================
|
||||
// CONNECT FUNCTION
|
||||
// ===================
|
||||
|
||||
const connect = useCallback(async () => {
|
||||
// Prevent multiple concurrent connection attempts
|
||||
const currentConnectionId = ++connectionIdRef.current;
|
||||
|
||||
// Generate unique call ID for this session
|
||||
const callId = `call-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
||||
callIdRef.current = callId;
|
||||
|
||||
logInfo('========== STARTING VOICE CALL ==========');
|
||||
logInfo(`User ID: ${userId}`);
|
||||
logInfo(`Platform: ${Platform.OS}`);
|
||||
logInfo(`Connection ID: ${currentConnectionId}`);
|
||||
logInfo(`Call ID: ${callId}`);
|
||||
|
||||
// Register with CallManager - this will disconnect any existing call
|
||||
logInfo('Registering call with CallManager...');
|
||||
await callManager.registerCall(callId, async () => {
|
||||
logInfo('CallManager requested disconnect (another call starting)');
|
||||
if (roomRef.current) {
|
||||
await roomRef.current.disconnect();
|
||||
roomRef.current = null;
|
||||
}
|
||||
await stopAudioSession();
|
||||
});
|
||||
logSuccess('Call registered with CallManager');
|
||||
|
||||
// Check if already connected
|
||||
if (roomRef.current) {
|
||||
logWarn('Already connected to a room, disconnecting first...');
|
||||
await roomRef.current.disconnect();
|
||||
roomRef.current = null;
|
||||
}
|
||||
|
||||
try {
|
||||
// ========== STEP 1: Initialize ==========
|
||||
setState('initializing');
|
||||
logInfo('STEP 1/6: Initializing...');
|
||||
|
||||
// Detect simulator vs real device
|
||||
const isSimulator = isIOSSimulator();
|
||||
logInfo(`Device type: ${isSimulator ? 'SIMULATOR' : 'REAL DEVICE'}`);
|
||||
logInfo(`Device model: ${Platform.OS} ${Platform.Version}`);
|
||||
|
||||
if (isSimulator) {
|
||||
logWarn('⚠️ SIMULATOR DETECTED - Microphone will NOT work!');
|
||||
logWarn('Simulator can only test: connection, token, agent presence, TTS playback');
|
||||
logWarn('For full STT test, use a real iPhone device');
|
||||
}
|
||||
|
||||
// Check if connection was cancelled
|
||||
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
|
||||
logWarn('Connection cancelled (component unmounting or new connection started)');
|
||||
return;
|
||||
}
|
||||
|
||||
// ========== STEP 2: Register WebRTC Globals ==========
|
||||
logInfo('STEP 2/6: Registering WebRTC globals...');
|
||||
|
||||
const { registerGlobals } = await import('@livekit/react-native');
|
||||
|
||||
if (typeof global.RTCPeerConnection === 'undefined') {
|
||||
logInfo('RTCPeerConnection not found, calling registerGlobals()...');
|
||||
registerGlobals();
|
||||
logSuccess('WebRTC globals registered!');
|
||||
} else {
|
||||
logInfo('WebRTC globals already registered');
|
||||
}
|
||||
|
||||
// Check again
|
||||
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
|
||||
logWarn('Connection cancelled after registerGlobals');
|
||||
return;
|
||||
}
|
||||
|
||||
// ========== STEP 3: Configure iOS Audio ==========
|
||||
setState('configuring_audio');
|
||||
logInfo('STEP 3/6: Configuring iOS AudioSession...');
|
||||
|
||||
await configureAudioForVoiceCall();
|
||||
logSuccess('iOS AudioSession configured!');
|
||||
|
||||
// Check again
|
||||
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
|
||||
logWarn('Connection cancelled after audio config');
|
||||
await stopAudioSession();
|
||||
return;
|
||||
}
|
||||
|
||||
// ========== STEP 4: Get Token ==========
|
||||
setState('requesting_token');
|
||||
logInfo('STEP 4/6: Requesting token from server...');
|
||||
|
||||
const tokenResult = await getToken(userId, beneficiaryData);
|
||||
|
||||
if (!tokenResult.success || !tokenResult.data) {
|
||||
const errorMsg = tokenResult.error || 'Failed to get token';
|
||||
logError(`Token request failed: ${errorMsg}`);
|
||||
setError(errorMsg);
|
||||
setState('error');
|
||||
return;
|
||||
}
|
||||
|
||||
const { token, wsUrl, roomName: room } = tokenResult.data;
|
||||
setRoomName(room);
|
||||
|
||||
logSuccess(`Token received!`);
|
||||
logInfo(` Room: ${room}`);
|
||||
logInfo(` WebSocket URL: ${wsUrl}`);
|
||||
logInfo(` Token length: ${token.length} chars`);
|
||||
|
||||
// Check again
|
||||
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
|
||||
logWarn('Connection cancelled after token');
|
||||
await stopAudioSession();
|
||||
return;
|
||||
}
|
||||
|
||||
// ========== STEP 5: Import LiveKit and Create Room ==========
|
||||
logInfo('STEP 5/6: Creating LiveKit Room...');
|
||||
|
||||
const { Room, RoomEvent, ConnectionState: LKConnectionState, Track } = await import(
|
||||
'livekit-client'
|
||||
);
|
||||
|
||||
logInfo(` Room class available: ${typeof Room === 'function'}`);
|
||||
logInfo(` RoomEvent available: ${typeof RoomEvent === 'object'}`);
|
||||
|
||||
const lkRoom = new Room();
|
||||
roomRef.current = lkRoom;
|
||||
|
||||
logSuccess('Room instance created!');
|
||||
|
||||
// ========== Setup Event Listeners ==========
|
||||
logInfo('Setting up event listeners...');
|
||||
|
||||
// Connection state changes
|
||||
lkRoom.on(RoomEvent.ConnectionStateChanged, (newState) => {
|
||||
logInfo(`EVENT: ConnectionStateChanged -> ${newState}`);
|
||||
|
||||
switch (newState) {
|
||||
case LKConnectionState.Connecting:
|
||||
setState('connecting');
|
||||
break;
|
||||
case LKConnectionState.Connected:
|
||||
setState('connected');
|
||||
logSuccess('Connected to room!');
|
||||
if (!callStartTimeRef.current) {
|
||||
callStartTimeRef.current = Date.now();
|
||||
logInfo('Call timer started');
|
||||
}
|
||||
break;
|
||||
case LKConnectionState.Reconnecting:
|
||||
setState('reconnecting');
|
||||
logWarn('Reconnecting...');
|
||||
break;
|
||||
case LKConnectionState.Disconnected:
|
||||
setState('disconnected');
|
||||
logInfo('Disconnected from room');
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
// Track subscribed (audio from agent)
|
||||
lkRoom.on(RoomEvent.TrackSubscribed, async (track, publication, participant) => {
|
||||
logInfo(`EVENT: TrackSubscribed`);
|
||||
logInfo(` Track kind: ${track.kind}`);
|
||||
logInfo(` Track source: ${track.source}`);
|
||||
logInfo(` Participant: ${participant.identity}`);
|
||||
logInfo(` Publication SID: ${publication.trackSid}`);
|
||||
|
||||
if (track.kind === Track.Kind.Audio) {
|
||||
logSuccess(`Audio track from ${participant.identity} - should hear voice now!`);
|
||||
setIsAgentSpeaking(true);
|
||||
|
||||
// Reconfigure audio for playback
|
||||
logInfo('Reconfiguring audio for playback...');
|
||||
await reconfigureAudioForPlayback();
|
||||
}
|
||||
});
|
||||
|
||||
// Track unsubscribed
|
||||
lkRoom.on(RoomEvent.TrackUnsubscribed, (track, publication, participant) => {
|
||||
logInfo(`EVENT: TrackUnsubscribed`);
|
||||
logInfo(` Track kind: ${track.kind}`);
|
||||
logInfo(` Participant: ${participant.identity}`);
|
||||
|
||||
if (track.kind === Track.Kind.Audio) {
|
||||
setIsAgentSpeaking(false);
|
||||
}
|
||||
});
|
||||
|
||||
// Track muted/unmuted
|
||||
lkRoom.on(RoomEvent.TrackMuted, (publication, participant) => {
|
||||
logInfo(`EVENT: TrackMuted - ${publication.trackSid} by ${participant.identity}`);
|
||||
});
|
||||
|
||||
lkRoom.on(RoomEvent.TrackUnmuted, (publication, participant) => {
|
||||
logInfo(`EVENT: TrackUnmuted - ${publication.trackSid} by ${participant.identity}`);
|
||||
});
|
||||
|
||||
// Participants
|
||||
lkRoom.on(RoomEvent.ParticipantConnected, (participant) => {
|
||||
logSuccess(`EVENT: ParticipantConnected - ${participant.identity}`);
|
||||
setParticipantCount((c) => c + 1);
|
||||
});
|
||||
|
||||
lkRoom.on(RoomEvent.ParticipantDisconnected, (participant) => {
|
||||
logInfo(`EVENT: ParticipantDisconnected - ${participant.identity}`);
|
||||
setParticipantCount((c) => Math.max(0, c - 1));
|
||||
});
|
||||
|
||||
// Active speakers (voice activity)
|
||||
lkRoom.on(RoomEvent.ActiveSpeakersChanged, (speakers) => {
|
||||
if (speakers.length > 0) {
|
||||
const speakerNames = speakers.map((s: any) => s.identity).join(', ');
|
||||
logInfo(`EVENT: ActiveSpeakersChanged - ${speakerNames}`);
|
||||
|
||||
// Check if agent is speaking
|
||||
const agentSpeaking = speakers.some((s: any) => s.identity.startsWith('agent'));
|
||||
setIsAgentSpeaking(agentSpeaking);
|
||||
}
|
||||
});
|
||||
|
||||
// Local track published (our mic)
|
||||
lkRoom.on(RoomEvent.LocalTrackPublished, (publication, participant) => {
|
||||
logSuccess(`EVENT: LocalTrackPublished`);
|
||||
logInfo(` Track: ${publication.trackSid}`);
|
||||
logInfo(` Kind: ${publication.kind}`);
|
||||
logInfo(` Source: ${publication.source}`);
|
||||
});
|
||||
|
||||
// Audio playback status
|
||||
lkRoom.on(RoomEvent.AudioPlaybackStatusChanged, () => {
|
||||
const canPlay = lkRoom.canPlaybackAudio;
|
||||
logInfo(`EVENT: AudioPlaybackStatusChanged - canPlaybackAudio: ${canPlay}`);
|
||||
setCanPlayAudio(canPlay);
|
||||
});
|
||||
|
||||
// Data received (transcripts)
|
||||
lkRoom.on(RoomEvent.DataReceived, (payload, participant) => {
|
||||
try {
|
||||
const data = JSON.parse(new TextDecoder().decode(payload));
|
||||
logInfo(`EVENT: DataReceived from ${participant?.identity || 'unknown'}`);
|
||||
logInfo(` Type: ${data.type}`);
|
||||
|
||||
if (data.type === 'transcript' && onTranscript) {
|
||||
logInfo(` Role: ${data.role}, Text: ${data.text?.substring(0, 50)}...`);
|
||||
onTranscript(data.role, data.text);
|
||||
}
|
||||
} catch (e) {
|
||||
// Non-JSON data, ignore
|
||||
}
|
||||
});
|
||||
|
||||
// Errors
|
||||
lkRoom.on(RoomEvent.Disconnected, (reason) => {
|
||||
logWarn(`EVENT: Disconnected - Reason: ${reason}`);
|
||||
});
|
||||
|
||||
// Check again before connect
|
||||
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
|
||||
logWarn('Connection cancelled before room.connect()');
|
||||
await stopAudioSession();
|
||||
return;
|
||||
}
|
||||
|
||||
// ========== STEP 6: Connect to Room ==========
|
||||
setState('connecting');
|
||||
logInfo('STEP 6/6: Connecting to LiveKit room...');
|
||||
logInfo(` URL: ${wsUrl}`);
|
||||
logInfo(` Room: ${room}`);
|
||||
|
||||
await lkRoom.connect(wsUrl, token, {
|
||||
autoSubscribe: true,
|
||||
});
|
||||
|
||||
logSuccess('Connected to room!');
|
||||
|
||||
// ========== CRITICAL: Start Audio Playback ==========
|
||||
// This is REQUIRED for audio to play on iOS and Android!
|
||||
// Without this call, remote audio tracks will NOT be heard.
|
||||
logInfo('Starting audio playback (room.startAudio)...');
|
||||
try {
|
||||
await lkRoom.startAudio();
|
||||
logSuccess(`Audio playback started! canPlaybackAudio: ${lkRoom.canPlaybackAudio}`);
|
||||
setCanPlayAudio(lkRoom.canPlaybackAudio);
|
||||
} catch (audioPlaybackErr: any) {
|
||||
logError(`startAudio failed: ${audioPlaybackErr.message}`);
|
||||
// Don't fail the whole call - audio might still work on some platforms
|
||||
}
|
||||
|
||||
// Check if connection was cancelled after connect
|
||||
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
|
||||
logWarn('Connection cancelled after room.connect()');
|
||||
await lkRoom.disconnect();
|
||||
await stopAudioSession();
|
||||
return;
|
||||
}
|
||||
|
||||
// ========== Enable Microphone ==========
|
||||
logInfo('Enabling microphone...');
|
||||
|
||||
try {
|
||||
await lkRoom.localParticipant.setMicrophoneEnabled(true);
|
||||
logSuccess('Microphone enabled!');
|
||||
logInfo(` Local participant: ${lkRoom.localParticipant.identity}`);
|
||||
|
||||
// Log track info - CRITICAL for debugging!
|
||||
const audioTracks = lkRoom.localParticipant.getTrackPublications();
|
||||
logInfo(` Published tracks: ${audioTracks.length}`);
|
||||
|
||||
let micTrackFound = false;
|
||||
audioTracks.forEach((pub) => {
|
||||
logInfo(` - ${pub.kind}: ${pub.trackSid} (${pub.source})`);
|
||||
logInfo(` isMuted: ${pub.isMuted}, isSubscribed: ${pub.isSubscribed}`);
|
||||
|
||||
if (pub.kind === 'audio' && pub.source === 'microphone') {
|
||||
micTrackFound = true;
|
||||
const track = pub.track;
|
||||
if (track) {
|
||||
logInfo(` Track mediaStreamTrack: ${track.mediaStreamTrack?.readyState || 'N/A'}`);
|
||||
logInfo(` Track enabled: ${track.mediaStreamTrack?.enabled || 'N/A'}`);
|
||||
} else {
|
||||
logWarn(` WARNING: No track object on publication!`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (!micTrackFound) {
|
||||
// Check if simulator
|
||||
const isSimulator = isIOSSimulator();
|
||||
if (isSimulator) {
|
||||
logWarn('No microphone track - EXPECTED on simulator');
|
||||
logInfo('Simulator test: check if Agent joined and TTS works');
|
||||
} else {
|
||||
logError('CRITICAL: No microphone track published! STT will NOT work!');
|
||||
logError('Possible causes: permissions denied, AudioSession not configured, hardware issue');
|
||||
}
|
||||
} else {
|
||||
logSuccess('Microphone track found and published - STT should work');
|
||||
}
|
||||
|
||||
} catch (micError: any) {
|
||||
logError(`Failed to enable microphone: ${micError.message}`);
|
||||
logError(`Stack: ${micError.stack || 'N/A'}`);
|
||||
// This is CRITICAL - user must know!
|
||||
setError(`Microphone error: ${micError.message}`);
|
||||
}
|
||||
|
||||
// Set initial participant count
|
||||
setParticipantCount(lkRoom.remoteParticipants.size);
|
||||
logInfo(`Remote participants: ${lkRoom.remoteParticipants.size}`);
|
||||
|
||||
logSuccess('========== VOICE CALL STARTED ==========');
|
||||
} catch (err: any) {
|
||||
// Ignore errors if unmounting
|
||||
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
|
||||
logWarn('Error ignored (component unmounting)');
|
||||
return;
|
||||
}
|
||||
|
||||
const errorMsg = err?.message || String(err);
|
||||
logError(`Connection failed: ${errorMsg}`);
|
||||
logError(`Stack: ${err?.stack || 'N/A'}`);
|
||||
|
||||
setError(errorMsg);
|
||||
setState('error');
|
||||
|
||||
// Cleanup
|
||||
await stopAudioSession();
|
||||
}
|
||||
}, [userId, beneficiaryData, onTranscript, logInfo, logWarn, logError, logSuccess]);
|
||||
|
||||
// ===================
|
||||
// DISCONNECT FUNCTION
|
||||
// ===================
|
||||
|
||||
const disconnect = useCallback(async () => {
|
||||
logInfo('========== DISCONNECTING ==========');
|
||||
setState('disconnecting');
|
||||
|
||||
// Unregister from CallManager
|
||||
if (callIdRef.current) {
|
||||
logInfo(`Unregistering call: ${callIdRef.current}`);
|
||||
callManager.unregisterCall(callIdRef.current);
|
||||
callIdRef.current = null;
|
||||
}
|
||||
|
||||
try {
|
||||
if (roomRef.current) {
|
||||
logInfo('Disconnecting from room...');
|
||||
await roomRef.current.disconnect();
|
||||
roomRef.current = null;
|
||||
logSuccess('Disconnected from room');
|
||||
} else {
|
||||
logInfo('No room to disconnect from');
|
||||
}
|
||||
} catch (err: any) {
|
||||
logError(`Disconnect error: ${err.message}`);
|
||||
}
|
||||
|
||||
logInfo('Stopping audio session...');
|
||||
await stopAudioSession();
|
||||
|
||||
// Reset state
|
||||
setState('disconnected');
|
||||
setRoomName(null);
|
||||
setIsMuted(false);
|
||||
setIsAgentSpeaking(false);
|
||||
setParticipantCount(0);
|
||||
callStartTimeRef.current = null;
|
||||
|
||||
logSuccess('========== DISCONNECTED ==========');
|
||||
}, [logInfo, logError, logSuccess]);
|
||||
|
||||
// ===================
|
||||
// TOGGLE MUTE
|
||||
// ===================
|
||||
|
||||
const toggleMute = useCallback(async () => {
|
||||
if (!roomRef.current) {
|
||||
logWarn('Cannot toggle mute - not connected');
|
||||
return;
|
||||
}
|
||||
|
||||
const newMuted = !isMuted;
|
||||
logInfo(`Toggling mute: ${isMuted} -> ${newMuted}`);
|
||||
|
||||
try {
|
||||
await roomRef.current.localParticipant.setMicrophoneEnabled(!newMuted);
|
||||
setIsMuted(newMuted);
|
||||
logSuccess(`Microphone ${newMuted ? 'muted' : 'unmuted'}`);
|
||||
} catch (err: any) {
|
||||
logError(`Failed to toggle mute: ${err.message}`);
|
||||
}
|
||||
}, [isMuted, logInfo, logWarn, logError, logSuccess]);
|
||||
|
||||
// ===================
|
||||
// CALL DURATION TIMER
|
||||
// ===================
|
||||
|
||||
useEffect(() => {
|
||||
if (state !== 'connected') return;
|
||||
|
||||
const interval = setInterval(() => {
|
||||
if (callStartTimeRef.current) {
|
||||
const elapsed = Math.floor((Date.now() - callStartTimeRef.current) / 1000);
|
||||
setCallDuration(elapsed);
|
||||
}
|
||||
}, 1000);
|
||||
|
||||
return () => clearInterval(interval);
|
||||
}, [state]);
|
||||
|
||||
// ===================
|
||||
// APP STATE HANDLING
|
||||
// ===================
|
||||
|
||||
useEffect(() => {
|
||||
const handleAppStateChange = (nextAppState: AppStateStatus) => {
|
||||
const prevState = appStateRef.current;
|
||||
appStateRef.current = nextAppState;
|
||||
|
||||
if (prevState.match(/inactive|background/) && nextAppState === 'active') {
|
||||
logInfo('App returned to foreground');
|
||||
} else if (prevState === 'active' && nextAppState.match(/inactive|background/)) {
|
||||
logInfo('App went to background - call continues in background');
|
||||
}
|
||||
};
|
||||
|
||||
const subscription = AppState.addEventListener('change', handleAppStateChange);
|
||||
return () => subscription.remove();
|
||||
}, [logInfo]);
|
||||
|
||||
// ===================
|
||||
// CLEANUP ON UNMOUNT
|
||||
// ===================
|
||||
|
||||
useEffect(() => {
|
||||
isUnmountingRef.current = false;
|
||||
|
||||
return () => {
|
||||
isUnmountingRef.current = true;
|
||||
|
||||
// Cleanup
|
||||
const cleanup = async () => {
|
||||
// Unregister from CallManager
|
||||
if (callIdRef.current) {
|
||||
callManager.unregisterCall(callIdRef.current);
|
||||
callIdRef.current = null;
|
||||
}
|
||||
|
||||
if (roomRef.current) {
|
||||
try {
|
||||
await roomRef.current.disconnect();
|
||||
} catch (e) {
|
||||
// Ignore
|
||||
}
|
||||
roomRef.current = null;
|
||||
}
|
||||
await stopAudioSession();
|
||||
};
|
||||
|
||||
cleanup();
|
||||
};
|
||||
}, []);
|
||||
|
||||
// ===================
|
||||
// AUTO CONNECT
|
||||
// ===================
|
||||
|
||||
useEffect(() => {
|
||||
if (autoConnect && state === 'idle') {
|
||||
connect();
|
||||
}
|
||||
}, [autoConnect, state, connect]);
|
||||
|
||||
// ===================
|
||||
// RETURN
|
||||
// ===================
|
||||
|
||||
return {
|
||||
// Connection state
|
||||
state,
|
||||
error,
|
||||
|
||||
// Call info
|
||||
roomName,
|
||||
callDuration,
|
||||
|
||||
// Audio state
|
||||
isMuted,
|
||||
isAgentSpeaking,
|
||||
canPlayAudio,
|
||||
|
||||
// Debug
|
||||
logs,
|
||||
participantCount,
|
||||
|
||||
// Actions
|
||||
connect,
|
||||
disconnect,
|
||||
toggleMute,
|
||||
clearLogs,
|
||||
};
|
||||
}
|
||||
|
||||
export { VOICE_NAME };
|
||||
295
hooks/useSpeechRecognition.ts
Normal file
295
hooks/useSpeechRecognition.ts
Normal file
@ -0,0 +1,295 @@
|
||||
/**
|
||||
* Speech Recognition Hook
|
||||
*
|
||||
* Wraps @jamsch/expo-speech-recognition for easy use in components.
|
||||
* Provides start/stop controls, recognized text, and status states.
|
||||
*
|
||||
* Usage:
|
||||
* ```typescript
|
||||
* const { startListening, stopListening, isListening, recognizedText, error } = useSpeechRecognition();
|
||||
*
|
||||
* // Start listening (will request permissions if needed)
|
||||
* await startListening();
|
||||
*
|
||||
* // Stop and get final result
|
||||
* stopListening();
|
||||
*
|
||||
* // recognizedText contains the transcript
|
||||
* ```
|
||||
*/
|
||||
|
||||
import { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import {
|
||||
ExpoSpeechRecognitionModule,
|
||||
useSpeechRecognitionEvent,
|
||||
} from '@jamsch/expo-speech-recognition';
|
||||
import { Platform } from 'react-native';
|
||||
|
||||
export interface UseSpeechRecognitionOptions {
|
||||
/** Language for recognition (default: 'en-US') */
|
||||
lang?: string;
|
||||
/** Whether to return interim results while speaking (default: true) */
|
||||
interimResults?: boolean;
|
||||
/** Whether to continue listening after pause (default: false) */
|
||||
continuous?: boolean;
|
||||
/** Whether to add punctuation (iOS only, default: true) */
|
||||
addsPunctuation?: boolean;
|
||||
/** Callback when speech recognition result is available */
|
||||
onResult?: (transcript: string, isFinal: boolean) => void;
|
||||
/** Callback when an error occurs */
|
||||
onError?: (error: string) => void;
|
||||
/** Callback when speech recognition starts */
|
||||
onStart?: () => void;
|
||||
/** Callback when speech recognition ends */
|
||||
onEnd?: () => void;
|
||||
/** Callback when voice activity is detected (first interim result) - useful for interrupting TTS */
|
||||
onVoiceDetected?: () => void;
|
||||
}
|
||||
|
||||
export interface UseSpeechRecognitionReturn {
|
||||
/** Start listening for speech */
|
||||
startListening: () => Promise<boolean>;
|
||||
/** Stop listening and finalize result */
|
||||
stopListening: () => void;
|
||||
/** Abort listening without processing */
|
||||
abortListening: () => void;
|
||||
/** Whether currently listening */
|
||||
isListening: boolean;
|
||||
/** Whether speech recognition is available on this device */
|
||||
isAvailable: boolean;
|
||||
/** Current recognized text (updates in real-time if interimResults=true) */
|
||||
recognizedText: string;
|
||||
/** Partial transcript (interim result, not final) */
|
||||
partialTranscript: string;
|
||||
/** Error message if any */
|
||||
error: string | null;
|
||||
/** Clear the recognized text and error */
|
||||
reset: () => void;
|
||||
}
|
||||
|
||||
export function useSpeechRecognition(
|
||||
options: UseSpeechRecognitionOptions = {}
|
||||
): UseSpeechRecognitionReturn {
|
||||
const {
|
||||
lang = 'en-US',
|
||||
interimResults = true,
|
||||
continuous = false,
|
||||
addsPunctuation = true,
|
||||
onResult,
|
||||
onError,
|
||||
onStart,
|
||||
onEnd,
|
||||
onVoiceDetected,
|
||||
} = options;
|
||||
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const [isAvailable, setIsAvailable] = useState(true);
|
||||
const [recognizedText, setRecognizedText] = useState('');
|
||||
const [partialTranscript, setPartialTranscript] = useState('');
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// Track if we're in the middle of starting to prevent double-starts
|
||||
const isStartingRef = useRef(false);
|
||||
// Track if voice has been detected in current session (for onVoiceDetected callback)
|
||||
const voiceDetectedRef = useRef(false);
|
||||
|
||||
// Check availability on mount
|
||||
useEffect(() => {
|
||||
const checkAvailability = async () => {
|
||||
try {
|
||||
// Check if we can get permissions (indirect availability check)
|
||||
const status = await ExpoSpeechRecognitionModule.getPermissionsAsync();
|
||||
// If we can query permissions, the module is available
|
||||
setIsAvailable(true);
|
||||
console.log('[SpeechRecognition] Available, permission status:', status.status);
|
||||
} catch (err) {
|
||||
console.error('[SpeechRecognition] Not available:', err);
|
||||
setIsAvailable(false);
|
||||
}
|
||||
};
|
||||
checkAvailability();
|
||||
}, []);
|
||||
|
||||
// Event: Recognition started
|
||||
useSpeechRecognitionEvent('start', () => {
|
||||
console.log('[SpeechRecognition] Started');
|
||||
setIsListening(true);
|
||||
setError(null);
|
||||
isStartingRef.current = false;
|
||||
voiceDetectedRef.current = false; // Reset voice detection flag for new session
|
||||
onStart?.();
|
||||
});
|
||||
|
||||
// Event: Recognition ended
|
||||
useSpeechRecognitionEvent('end', () => {
|
||||
console.log('[SpeechRecognition] Ended');
|
||||
setIsListening(false);
|
||||
setPartialTranscript('');
|
||||
isStartingRef.current = false;
|
||||
voiceDetectedRef.current = false; // Reset for next session
|
||||
onEnd?.();
|
||||
});
|
||||
|
||||
// Event: Result available
|
||||
useSpeechRecognitionEvent('result', (event) => {
|
||||
const results = event.results;
|
||||
if (results && results.length > 0) {
|
||||
const result = results[results.length - 1];
|
||||
const transcript = result?.transcript || '';
|
||||
const isFinal = event.isFinal ?? false;
|
||||
|
||||
console.log('[SpeechRecognition] Result:', transcript.slice(0, 50), 'final:', isFinal);
|
||||
|
||||
// Trigger onVoiceDetected on first result (voice activity detected)
|
||||
if (!voiceDetectedRef.current && transcript.length > 0) {
|
||||
voiceDetectedRef.current = true;
|
||||
console.log('[SpeechRecognition] Voice activity detected');
|
||||
onVoiceDetected?.();
|
||||
}
|
||||
|
||||
if (isFinal) {
|
||||
setRecognizedText(transcript);
|
||||
setPartialTranscript('');
|
||||
} else {
|
||||
setPartialTranscript(transcript);
|
||||
}
|
||||
|
||||
onResult?.(transcript, isFinal);
|
||||
}
|
||||
});
|
||||
|
||||
// Event: Error occurred
|
||||
useSpeechRecognitionEvent('error', (event) => {
|
||||
const errorMessage = event.message || event.error || 'Speech recognition error';
|
||||
console.error('[SpeechRecognition] Error:', errorMessage);
|
||||
|
||||
// Don't set error for "no-speech" - this is normal when user doesn't say anything
|
||||
if (event.error !== 'no-speech') {
|
||||
setError(errorMessage);
|
||||
onError?.(errorMessage);
|
||||
}
|
||||
|
||||
setIsListening(false);
|
||||
isStartingRef.current = false;
|
||||
});
|
||||
|
||||
/**
|
||||
* Start listening for speech
|
||||
* @returns true if started successfully, false otherwise
|
||||
*/
|
||||
const startListening = useCallback(async (): Promise<boolean> => {
|
||||
if (isListening || isStartingRef.current) {
|
||||
console.log('[SpeechRecognition] Already listening or starting');
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isAvailable) {
|
||||
const msg = 'Speech recognition is not available on this device';
|
||||
console.error('[SpeechRecognition]', msg);
|
||||
setError(msg);
|
||||
onError?.(msg);
|
||||
return false;
|
||||
}
|
||||
|
||||
isStartingRef.current = true;
|
||||
setError(null);
|
||||
setRecognizedText('');
|
||||
setPartialTranscript('');
|
||||
|
||||
try {
|
||||
// Request permissions
|
||||
const permissionResult = await ExpoSpeechRecognitionModule.requestPermissionsAsync();
|
||||
|
||||
if (!permissionResult.granted) {
|
||||
const msg = 'Microphone permission denied';
|
||||
console.error('[SpeechRecognition]', msg);
|
||||
setError(msg);
|
||||
onError?.(msg);
|
||||
isStartingRef.current = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
console.log('[SpeechRecognition] Starting with lang:', lang);
|
||||
|
||||
// Start recognition
|
||||
ExpoSpeechRecognitionModule.start({
|
||||
lang,
|
||||
interimResults,
|
||||
continuous,
|
||||
addsPunctuation: Platform.OS === 'ios' ? addsPunctuation : undefined,
|
||||
// Android-specific: longer silence timeout for more natural pauses
|
||||
androidIntentOptions: Platform.OS === 'android' ? {
|
||||
EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS: 2000,
|
||||
EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS: 1500,
|
||||
} : undefined,
|
||||
});
|
||||
|
||||
return true;
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : 'Failed to start speech recognition';
|
||||
console.error('[SpeechRecognition] Start error:', msg);
|
||||
setError(msg);
|
||||
onError?.(msg);
|
||||
isStartingRef.current = false;
|
||||
return false;
|
||||
}
|
||||
}, [isListening, isAvailable, lang, interimResults, continuous, addsPunctuation, onError]);
|
||||
|
||||
/**
|
||||
* Stop listening and process final result
|
||||
*/
|
||||
const stopListening = useCallback(() => {
|
||||
if (!isListening && !isStartingRef.current) {
|
||||
console.log('[SpeechRecognition] Not listening, nothing to stop');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[SpeechRecognition] Stopping...');
|
||||
try {
|
||||
ExpoSpeechRecognitionModule.stop();
|
||||
} catch (err) {
|
||||
console.warn('[SpeechRecognition] Stop error:', err);
|
||||
}
|
||||
}, [isListening]);
|
||||
|
||||
/**
|
||||
* Abort listening without processing
|
||||
*/
|
||||
const abortListening = useCallback(() => {
|
||||
if (!isListening && !isStartingRef.current) {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[SpeechRecognition] Aborting...');
|
||||
try {
|
||||
ExpoSpeechRecognitionModule.abort();
|
||||
} catch (err) {
|
||||
console.warn('[SpeechRecognition] Abort error:', err);
|
||||
}
|
||||
|
||||
setIsListening(false);
|
||||
setPartialTranscript('');
|
||||
isStartingRef.current = false;
|
||||
}, [isListening]);
|
||||
|
||||
/**
|
||||
* Reset state
|
||||
*/
|
||||
const reset = useCallback(() => {
|
||||
setRecognizedText('');
|
||||
setPartialTranscript('');
|
||||
setError(null);
|
||||
}, []);
|
||||
|
||||
return {
|
||||
startListening,
|
||||
stopListening,
|
||||
abortListening,
|
||||
isListening,
|
||||
isAvailable,
|
||||
recognizedText,
|
||||
partialTranscript,
|
||||
error,
|
||||
reset,
|
||||
};
|
||||
}
|
||||
252
hooks/useTextToSpeech.ts
Normal file
252
hooks/useTextToSpeech.ts
Normal file
@ -0,0 +1,252 @@
|
||||
/**
|
||||
* Text-to-Speech Hook
|
||||
*
|
||||
* Wraps expo-speech for easy use in components.
|
||||
* Provides speak/stop controls, status states, and queue management.
|
||||
*
|
||||
* Usage:
|
||||
* ```typescript
|
||||
* const { speak, stop, isSpeaking, error } = useTextToSpeech();
|
||||
*
|
||||
* // Speak text
|
||||
* await speak('Hello world');
|
||||
*
|
||||
* // Stop speaking
|
||||
* stop();
|
||||
*
|
||||
* // Check if speaking
|
||||
* if (isSpeaking) { ... }
|
||||
* ```
|
||||
*/
|
||||
|
||||
import { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import * as Speech from 'expo-speech';
|
||||
|
||||
export interface UseTextToSpeechOptions {
|
||||
/** Language for speech (default: 'en-US') */
|
||||
language?: string;
|
||||
/** Speech rate, 0.5-2.0 (default: 0.9) */
|
||||
rate?: number;
|
||||
/** Speech pitch, 0.5-2.0 (default: 1.0) */
|
||||
pitch?: number;
|
||||
/** Voice identifier (optional, uses system default) */
|
||||
voice?: string;
|
||||
/** Callback when speech starts */
|
||||
onStart?: () => void;
|
||||
/** Callback when speech ends */
|
||||
onDone?: () => void;
|
||||
/** Callback when speech is stopped */
|
||||
onStopped?: () => void;
|
||||
/** Callback when an error occurs */
|
||||
onError?: (error: string) => void;
|
||||
}
|
||||
|
||||
export interface UseTextToSpeechReturn {
|
||||
/** Speak text using TTS */
|
||||
speak: (text: string, options?: Partial<UseTextToSpeechOptions>) => Promise<void>;
|
||||
/** Stop speaking */
|
||||
stop: () => void;
|
||||
/** Whether currently speaking */
|
||||
isSpeaking: boolean;
|
||||
/** Whether TTS is available on this device */
|
||||
isAvailable: boolean;
|
||||
/** Current text being spoken */
|
||||
currentText: string | null;
|
||||
/** Error message if any */
|
||||
error: string | null;
|
||||
/** Get available voices */
|
||||
getVoices: () => Promise<Speech.Voice[]>;
|
||||
/** Clear error state */
|
||||
clearError: () => void;
|
||||
}
|
||||
|
||||
export function useTextToSpeech(
|
||||
options: UseTextToSpeechOptions = {}
|
||||
): UseTextToSpeechReturn {
|
||||
const {
|
||||
language = 'en-US',
|
||||
rate = 0.9,
|
||||
pitch = 1.0,
|
||||
voice,
|
||||
onStart,
|
||||
onDone,
|
||||
onStopped,
|
||||
onError,
|
||||
} = options;
|
||||
|
||||
const [isSpeaking, setIsSpeaking] = useState(false);
|
||||
const [isAvailable, setIsAvailable] = useState(true);
|
||||
const [currentText, setCurrentText] = useState<string | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// Track if component is mounted to prevent state updates after unmount
|
||||
const isMountedRef = useRef(true);
|
||||
// Track current speech promise resolve
|
||||
const resolveRef = useRef<(() => void) | null>(null);
|
||||
|
||||
// Check if currently speaking on mount and cleanup
|
||||
useEffect(() => {
|
||||
isMountedRef.current = true;
|
||||
|
||||
const checkSpeaking = async () => {
|
||||
try {
|
||||
const speaking = await Speech.isSpeakingAsync();
|
||||
if (isMountedRef.current) {
|
||||
setIsSpeaking(speaking);
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn('[TTS] Could not check speaking status:', err);
|
||||
}
|
||||
};
|
||||
checkSpeaking();
|
||||
|
||||
return () => {
|
||||
isMountedRef.current = false;
|
||||
// Stop any ongoing speech when unmounting
|
||||
Speech.stop();
|
||||
};
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Speak text using TTS
|
||||
* @param text - Text to speak
|
||||
* @param overrideOptions - Override default options for this call
|
||||
* @returns Promise that resolves when speech completes
|
||||
*/
|
||||
const speak = useCallback(
|
||||
async (
|
||||
text: string,
|
||||
overrideOptions?: Partial<UseTextToSpeechOptions>
|
||||
): Promise<void> => {
|
||||
const trimmedText = text.trim();
|
||||
if (!trimmedText) {
|
||||
console.log('[TTS] Empty text, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
// Merge options
|
||||
const opts = {
|
||||
language: overrideOptions?.language ?? language,
|
||||
rate: overrideOptions?.rate ?? rate,
|
||||
pitch: overrideOptions?.pitch ?? pitch,
|
||||
voice: overrideOptions?.voice ?? voice,
|
||||
onStart: overrideOptions?.onStart ?? onStart,
|
||||
onDone: overrideOptions?.onDone ?? onDone,
|
||||
onStopped: overrideOptions?.onStopped ?? onStopped,
|
||||
onError: overrideOptions?.onError ?? onError,
|
||||
};
|
||||
|
||||
// Stop any current speech before starting new
|
||||
if (isSpeaking) {
|
||||
Speech.stop();
|
||||
// Wait a bit for cleanup
|
||||
await new Promise((r) => setTimeout(r, 50));
|
||||
}
|
||||
|
||||
console.log('[TTS] Speaking:', trimmedText.slice(0, 50) + (trimmedText.length > 50 ? '...' : ''));
|
||||
|
||||
if (isMountedRef.current) {
|
||||
setCurrentText(trimmedText);
|
||||
setIsSpeaking(true);
|
||||
setError(null);
|
||||
}
|
||||
|
||||
return new Promise<void>((resolve) => {
|
||||
resolveRef.current = resolve;
|
||||
|
||||
Speech.speak(trimmedText, {
|
||||
language: opts.language,
|
||||
rate: opts.rate,
|
||||
pitch: opts.pitch,
|
||||
voice: opts.voice,
|
||||
onStart: () => {
|
||||
console.log('[TTS] Started');
|
||||
opts.onStart?.();
|
||||
},
|
||||
onDone: () => {
|
||||
console.log('[TTS] Completed');
|
||||
if (isMountedRef.current) {
|
||||
setIsSpeaking(false);
|
||||
setCurrentText(null);
|
||||
}
|
||||
opts.onDone?.();
|
||||
resolveRef.current = null;
|
||||
resolve();
|
||||
},
|
||||
onStopped: () => {
|
||||
console.log('[TTS] Stopped');
|
||||
if (isMountedRef.current) {
|
||||
setIsSpeaking(false);
|
||||
setCurrentText(null);
|
||||
}
|
||||
opts.onStopped?.();
|
||||
resolveRef.current = null;
|
||||
resolve();
|
||||
},
|
||||
onError: (err) => {
|
||||
const errorMsg = typeof err === 'string' ? err : 'Speech synthesis error';
|
||||
console.error('[TTS] Error:', errorMsg);
|
||||
if (isMountedRef.current) {
|
||||
setIsSpeaking(false);
|
||||
setCurrentText(null);
|
||||
setError(errorMsg);
|
||||
}
|
||||
opts.onError?.(errorMsg);
|
||||
resolveRef.current = null;
|
||||
resolve();
|
||||
},
|
||||
});
|
||||
});
|
||||
},
|
||||
[language, rate, pitch, voice, isSpeaking, onStart, onDone, onStopped, onError]
|
||||
);
|
||||
|
||||
/**
|
||||
* Stop speaking
|
||||
*/
|
||||
const stop = useCallback(() => {
|
||||
console.log('[TTS] Stop requested');
|
||||
Speech.stop();
|
||||
if (isMountedRef.current) {
|
||||
setIsSpeaking(false);
|
||||
setCurrentText(null);
|
||||
}
|
||||
// Resolve pending promise
|
||||
if (resolveRef.current) {
|
||||
resolveRef.current();
|
||||
resolveRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Get available voices for speech synthesis
|
||||
*/
|
||||
const getVoices = useCallback(async (): Promise<Speech.Voice[]> => {
|
||||
try {
|
||||
const voices = await Speech.getAvailableVoicesAsync();
|
||||
console.log('[TTS] Available voices:', voices.length);
|
||||
return voices;
|
||||
} catch (err) {
|
||||
console.error('[TTS] Could not get voices:', err);
|
||||
return [];
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Clear error state
|
||||
*/
|
||||
const clearError = useCallback(() => {
|
||||
setError(null);
|
||||
}, []);
|
||||
|
||||
return {
|
||||
speak,
|
||||
stop,
|
||||
isSpeaking,
|
||||
isAvailable,
|
||||
currentText,
|
||||
error,
|
||||
getVoices,
|
||||
clearError,
|
||||
};
|
||||
}
|
||||
48
package-lock.json
generated
48
package-lock.json
generated
@ -8,11 +8,9 @@
|
||||
"name": "wellnuo",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@config-plugins/react-native-webrtc": "^13.0.0",
|
||||
"@dr.pogodin/react-native-fs": "^2.36.2",
|
||||
"@expo/vector-icons": "^15.0.3",
|
||||
"@livekit/react-native": "^2.9.6",
|
||||
"@livekit/react-native-expo-plugin": "^1.0.1",
|
||||
"@jamsch/expo-speech-recognition": "^0.2.15",
|
||||
"@notifee/react-native": "^9.1.8",
|
||||
"@react-navigation/bottom-tabs": "^7.4.0",
|
||||
"@react-navigation/elements": "^2.6.3",
|
||||
@ -29,12 +27,12 @@
|
||||
"expo-linking": "~8.0.10",
|
||||
"expo-router": "~6.0.19",
|
||||
"expo-secure-store": "^15.0.8",
|
||||
"expo-speech": "~14.0.6",
|
||||
"expo-splash-screen": "~31.0.12",
|
||||
"expo-status-bar": "~3.0.9",
|
||||
"expo-symbols": "~1.0.8",
|
||||
"expo-system-ui": "~6.0.9",
|
||||
"expo-web-browser": "~15.0.10",
|
||||
"livekit-client": "^2.17.0",
|
||||
"react": "19.1.0",
|
||||
"react-dom": "19.1.0",
|
||||
"react-native": "0.81.5",
|
||||
@ -1554,15 +1552,6 @@
|
||||
"integrity": "sha512-wJ8ReQbHxsAfXhrf9ixl0aYbZorRuOWpBNzm8pL8ftmSxQx/wnJD5Eg861NwJU/czy2VXFIebCeZnZrI9rktIQ==",
|
||||
"license": "(Apache-2.0 AND BSD-3-Clause)"
|
||||
},
|
||||
"node_modules/@config-plugins/react-native-webrtc": {
|
||||
"version": "13.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@config-plugins/react-native-webrtc/-/react-native-webrtc-13.0.0.tgz",
|
||||
"integrity": "sha512-EtRRLXmsU4GcDA3TgIxtqg++eh/CjbI6EV8N/1EFQTtaWI2lpww0fg+S0wd+ndXE0dFWaLqUFvZuyTAaAoOSeA==",
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"expo": "^54"
|
||||
}
|
||||
},
|
||||
"node_modules/@dr.pogodin/react-native-fs": {
|
||||
"version": "2.36.2",
|
||||
"resolved": "https://registry.npmjs.org/@dr.pogodin/react-native-fs/-/react-native-fs-2.36.2.tgz",
|
||||
@ -3013,6 +3002,18 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/@jamsch/expo-speech-recognition": {
|
||||
"version": "0.2.15",
|
||||
"resolved": "https://registry.npmjs.org/@jamsch/expo-speech-recognition/-/expo-speech-recognition-0.2.15.tgz",
|
||||
"integrity": "sha512-VzhR6a1bYnh8Yl704sBbvCmPqkZWzEggzl504myy6GKqQ90Ib+FQsz9FKI8RQbBXf8KHfhJVT3t0AxP6lYyyYw==",
|
||||
"deprecated": "Package has moved to expo-speech-recognition",
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"expo": "*",
|
||||
"react": "*",
|
||||
"react-native": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@jest/create-cache-key-function": {
|
||||
"version": "29.7.0",
|
||||
"resolved": "https://registry.npmjs.org/@jest/create-cache-key-function/-/create-cache-key-function-29.7.0.tgz",
|
||||
@ -3245,18 +3246,6 @@
|
||||
"react-native": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@livekit/react-native-expo-plugin": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@livekit/react-native-expo-plugin/-/react-native-expo-plugin-1.0.1.tgz",
|
||||
"integrity": "sha512-CSPjjzgDDlBH1ZyFyaw7/FW2Ql1S51eUkIxv/vjGwVshn+lUD6eQ9VgfUh7ha84itvjXi9X87FvP0XWKn9CiFQ==",
|
||||
"license": "Apache-2.0",
|
||||
"peerDependencies": {
|
||||
"@livekit/react-native": "^2.1.0",
|
||||
"expo": "*",
|
||||
"react": "*",
|
||||
"react-native": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@livekit/react-native-webrtc": {
|
||||
"version": "137.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@livekit/react-native-webrtc/-/react-native-webrtc-137.0.2.tgz",
|
||||
@ -7385,6 +7374,15 @@
|
||||
"node": ">=20.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/expo-speech": {
|
||||
"version": "14.0.8",
|
||||
"resolved": "https://registry.npmjs.org/expo-speech/-/expo-speech-14.0.8.tgz",
|
||||
"integrity": "sha512-UjBFCFv58nutlLw92L7kUS0ZjbOOfaTdiEv/HbjvMrT6BfldoOLLBZbaEcEhDdZK36NY/kass0Kzxk+co6vxSQ==",
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"expo": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/expo-splash-screen": {
|
||||
"version": "31.0.12",
|
||||
"resolved": "https://registry.npmjs.org/expo-splash-screen/-/expo-splash-screen-31.0.12.tgz",
|
||||
|
||||
@ -11,11 +11,9 @@
|
||||
"lint": "expo lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@config-plugins/react-native-webrtc": "^13.0.0",
|
||||
"@dr.pogodin/react-native-fs": "^2.36.2",
|
||||
"@expo/vector-icons": "^15.0.3",
|
||||
"@livekit/react-native": "^2.9.6",
|
||||
"@livekit/react-native-expo-plugin": "^1.0.1",
|
||||
"@jamsch/expo-speech-recognition": "^0.2.15",
|
||||
"@notifee/react-native": "^9.1.8",
|
||||
"@react-navigation/bottom-tabs": "^7.4.0",
|
||||
"@react-navigation/elements": "^2.6.3",
|
||||
@ -32,12 +30,12 @@
|
||||
"expo-linking": "~8.0.10",
|
||||
"expo-router": "~6.0.19",
|
||||
"expo-secure-store": "^15.0.8",
|
||||
"expo-speech": "~14.0.6",
|
||||
"expo-splash-screen": "~31.0.12",
|
||||
"expo-status-bar": "~3.0.9",
|
||||
"expo-symbols": "~1.0.8",
|
||||
"expo-system-ui": "~6.0.9",
|
||||
"expo-web-browser": "~15.0.10",
|
||||
"livekit-client": "^2.17.0",
|
||||
"react": "19.1.0",
|
||||
"react-dom": "19.1.0",
|
||||
"react-native": "0.81.5",
|
||||
|
||||
@ -3,8 +3,6 @@
|
||||
*
|
||||
* Ensures only ONE voice call can be active at a time per device.
|
||||
* If a new call is started while another is active, the old one is disconnected first.
|
||||
*
|
||||
* This addresses the LiveKit concurrent agent jobs limit (5 per project).
|
||||
*/
|
||||
|
||||
type DisconnectCallback = () => Promise<void>;
|
||||
|
||||
@ -1,146 +0,0 @@
|
||||
/**
|
||||
* LiveKit Voice AI Service
|
||||
* Connects to LiveKit Cloud with Julia AI agent
|
||||
* Uses dedicated Julia Token Server for token generation
|
||||
*/
|
||||
|
||||
// Julia Token Server (dedicated endpoint for LiveKit tokens)
|
||||
const JULIA_TOKEN_SERVER = 'https://wellnuo.smartlaunchhub.com/julia';
|
||||
|
||||
// Voice configuration
|
||||
export const VOICE_ID = 'Asteria';
|
||||
export const VOICE_NAME = 'Asteria';
|
||||
|
||||
// ============================================================================
|
||||
// SINGLE_DEPLOYMENT_MODE
|
||||
// When true: sends only deploymentId (no beneficiaryNamesDict)
|
||||
// When false: sends both deploymentId AND beneficiaryNamesDict
|
||||
//
|
||||
// Use true for WellNuo Lite (single beneficiary per user)
|
||||
// Use false for full WellNuo app (multiple beneficiaries)
|
||||
// ============================================================================
|
||||
export const SINGLE_DEPLOYMENT_MODE = true;
|
||||
|
||||
// Beneficiary data to pass to voice agent
|
||||
export interface BeneficiaryData {
|
||||
deploymentId: string;
|
||||
beneficiaryNamesDict: Record<string, string>;
|
||||
}
|
||||
|
||||
// API Response types
|
||||
export interface LiveKitTokenResponse {
|
||||
success: boolean;
|
||||
data?: {
|
||||
token: string;
|
||||
roomName: string;
|
||||
wsUrl: string;
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a LiveKit access token from Julia Token Server
|
||||
* No authentication required - token server is dedicated for voice AI
|
||||
* @param userId - User identifier
|
||||
* @param beneficiaryData - Optional beneficiary data to pass to voice agent
|
||||
*/
|
||||
export async function getToken(
|
||||
userId: string,
|
||||
beneficiaryData?: BeneficiaryData
|
||||
): Promise<LiveKitTokenResponse> {
|
||||
try {
|
||||
console.log('[LiveKit] Getting token for user:', userId);
|
||||
console.log('[LiveKit] SINGLE_DEPLOYMENT_MODE:', SINGLE_DEPLOYMENT_MODE);
|
||||
|
||||
// Prepare request body based on SINGLE_DEPLOYMENT_MODE
|
||||
let requestBody: { userId: string; beneficiaryData?: BeneficiaryData };
|
||||
|
||||
if (SINGLE_DEPLOYMENT_MODE && beneficiaryData) {
|
||||
// In single deployment mode: send only deploymentId, no beneficiaryNamesDict
|
||||
requestBody = {
|
||||
userId,
|
||||
beneficiaryData: {
|
||||
deploymentId: beneficiaryData.deploymentId,
|
||||
beneficiaryNamesDict: {}, // Empty - no list of names
|
||||
},
|
||||
};
|
||||
console.log('[LiveKit] Single deployment mode - sending only deploymentId:', beneficiaryData.deploymentId);
|
||||
} else {
|
||||
// Full mode: send everything
|
||||
requestBody = { userId, beneficiaryData };
|
||||
if (beneficiaryData) {
|
||||
console.log('[LiveKit] Full mode - sending beneficiary data:', beneficiaryData);
|
||||
}
|
||||
}
|
||||
|
||||
// Request LiveKit token from Julia Token Server
|
||||
const response = await fetch(`${JULIA_TOKEN_SERVER}/token`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
console.error('[LiveKit] Token request failed:', response.status, errorData);
|
||||
return {
|
||||
success: false,
|
||||
error: errorData.error || `Failed to get token: ${response.status}`,
|
||||
};
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (!data.success) {
|
||||
return {
|
||||
success: false,
|
||||
error: data.error || 'Token generation failed',
|
||||
};
|
||||
}
|
||||
|
||||
console.log('[LiveKit] Token received:', {
|
||||
room: data.data.roomName,
|
||||
identity: data.data.identity,
|
||||
url: data.data.wsUrl,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: {
|
||||
token: data.data.token,
|
||||
roomName: data.data.roomName,
|
||||
wsUrl: data.data.wsUrl,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[LiveKit] Get token error:', error);
|
||||
return {
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Failed to get token',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if LiveKit service is available
|
||||
*/
|
||||
export async function checkServerHealth(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${JULIA_TOKEN_SERVER}/health`, {
|
||||
method: 'GET',
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
console.log('[LiveKit] Health check:', data);
|
||||
return data.status === 'ok';
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error) {
|
||||
console.error('[LiveKit] Health check failed:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -1,336 +0,0 @@
|
||||
# FEATURE-002: LiveKit Voice Call with Julia AI
|
||||
|
||||
## Summary
|
||||
|
||||
Полноценный голосовой звонок с Julia AI через LiveKit Cloud. Пользователь нажимает кнопку "Start Voice Call", открывается экран звонка в стиле телефона, и он может разговаривать с Julia AI голосом.
|
||||
|
||||
## Status: 🔴 Not Started (требуется полная переделка)
|
||||
|
||||
## Priority: Critical
|
||||
|
||||
## Problem Statement
|
||||
|
||||
Текущая реализация имеет следующие проблемы:
|
||||
1. **STT (Speech-to-Text) работает нестабильно** — микрофон иногда детектируется, иногда нет
|
||||
2. **TTS работает** — голос Julia слышен
|
||||
3. **Код сложный и запутанный** — много legacy кода, полифиллов, хаков
|
||||
4. **Нет четкой архитектуры** — все в одном файле voice-call.tsx
|
||||
|
||||
## Root Cause Analysis
|
||||
|
||||
### Почему микрофон работает нестабильно:
|
||||
1. **iOS AudioSession** — неправильная конфигурация или race condition при настройке
|
||||
2. **registerGlobals()** — WebRTC polyfills могут не успевать инициализироваться
|
||||
3. **Permissions** — микрофон может быть не разрешен или занят другим процессом
|
||||
4. **Event handling** — события LiveKit могут теряться
|
||||
|
||||
### Что работает:
|
||||
- LiveKit Cloud connection ✅
|
||||
- Token generation ✅
|
||||
- TTS (Deepgram Asteria) ✅
|
||||
- Backend agent (Julia AI) ✅
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### System Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ WellNuo Lite App (iOS) │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────────┐ ┌──────────────────┐ ┌──────────────────┐ │
|
||||
│ │ Voice Tab │───▶│ VoiceCallScreen │───▶│ LiveKit Room │ │
|
||||
│ │ (entry) │ │ (fullscreen) │ │ (WebRTC) │ │
|
||||
│ └──────────────┘ └──────────────────┘ └──────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │useLiveKitRoom│ │ AudioSession │ │
|
||||
│ │ (hook) │ │ (iOS native) │ │
|
||||
│ └──────────────┘ └──────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
│ WebSocket + WebRTC
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ LiveKit Cloud │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ Room: wellnuo-{userId}-{timestamp} │
|
||||
│ Participants: user + julia-agent │
|
||||
│ Audio Tracks: bidirectional │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
│ Agent dispatch
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ Julia AI Agent (Python) │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ STT: Deepgram Nova-2 │
|
||||
│ LLM: WellNuo voice_ask API │
|
||||
│ TTS: Deepgram Aura Asteria │
|
||||
│ Framework: LiveKit Agents SDK 1.3.11 │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Data Flow
|
||||
|
||||
```
|
||||
User speaks → iOS Mic → WebRTC → LiveKit Cloud → Agent → Deepgram STT
|
||||
│
|
||||
▼
|
||||
WellNuo API (LLM)
|
||||
│
|
||||
▼
|
||||
Agent receives text ← LiveKit Cloud ← WebRTC ← Deepgram TTS (audio)
|
||||
│
|
||||
▼
|
||||
iOS Speaker → User hears Julia
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Technical Requirements
|
||||
|
||||
### Dependencies (package.json)
|
||||
|
||||
```json
|
||||
{
|
||||
"@livekit/react-native": "^2.x",
|
||||
"livekit-client": "^2.x",
|
||||
"expo-keep-awake": "^14.x"
|
||||
}
|
||||
```
|
||||
|
||||
### iOS Permissions (app.json)
|
||||
|
||||
```json
|
||||
{
|
||||
"ios": {
|
||||
"infoPlist": {
|
||||
"NSMicrophoneUsageDescription": "WellNuo needs microphone access for voice calls with Julia AI",
|
||||
"UIBackgroundModes": ["audio", "voip"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Token Server (already exists)
|
||||
|
||||
- **URL**: `https://wellnuo.smartlaunchhub.com/julia/token`
|
||||
- **Method**: POST
|
||||
- **Body**: `{ "userId": "string" }`
|
||||
- **Response**: `{ "success": true, "data": { "token", "roomName", "wsUrl" } }`
|
||||
|
||||
---
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
### Phase 1: Cleanup (DELETE old code)
|
||||
|
||||
- [ ] 1.1. Delete `app/voice-call.tsx` (current broken implementation)
|
||||
- [ ] 1.2. Keep `app/(tabs)/voice.tsx` (entry point) but simplify
|
||||
- [ ] 1.3. Keep `services/livekitService.ts` (token fetching)
|
||||
- [ ] 1.4. Keep `contexts/VoiceTranscriptContext.tsx` (transcript storage)
|
||||
- [ ] 1.5. Delete `components/VoiceIndicator.tsx` (unused)
|
||||
- [ ] 1.6. Delete `polyfills/livekit-globals.ts` (not needed with proper setup)
|
||||
|
||||
### Phase 2: New Architecture
|
||||
|
||||
- [ ] 2.1. Create `hooks/useLiveKitRoom.ts` — encapsulate all LiveKit logic
|
||||
- [ ] 2.2. Create `app/voice-call.tsx` — simple UI component using the hook
|
||||
- [ ] 2.3. Create `utils/audioSession.ts` — iOS AudioSession helper
|
||||
|
||||
### Phase 3: useLiveKitRoom Hook
|
||||
|
||||
**File**: `hooks/useLiveKitRoom.ts`
|
||||
|
||||
```typescript
|
||||
interface UseLiveKitRoomOptions {
|
||||
userId: string;
|
||||
onTranscript?: (role: 'user' | 'assistant', text: string) => void;
|
||||
}
|
||||
|
||||
interface UseLiveKitRoomReturn {
|
||||
// Connection state
|
||||
state: 'idle' | 'connecting' | 'connected' | 'reconnecting' | 'disconnected' | 'error';
|
||||
error: string | null;
|
||||
|
||||
// Call info
|
||||
roomName: string | null;
|
||||
callDuration: number; // seconds
|
||||
|
||||
// Audio state
|
||||
isMuted: boolean;
|
||||
isSpeaking: boolean; // agent is speaking
|
||||
|
||||
// Actions
|
||||
connect: () => Promise<void>;
|
||||
disconnect: () => Promise<void>;
|
||||
toggleMute: () => void;
|
||||
}
|
||||
```
|
||||
|
||||
**Implementation requirements**:
|
||||
1. MUST call `registerGlobals()` BEFORE importing `livekit-client`
|
||||
2. MUST configure iOS AudioSession BEFORE connecting to room
|
||||
3. MUST handle all RoomEvents properly
|
||||
4. MUST cleanup on unmount (disconnect, stop audio session)
|
||||
5. MUST handle background/foreground transitions
|
||||
|
||||
### Phase 4: iOS AudioSession Configuration
|
||||
|
||||
**Critical for microphone to work!**
|
||||
|
||||
```typescript
|
||||
// utils/audioSession.ts
|
||||
import { AudioSession } from '@livekit/react-native';
|
||||
import { Platform } from 'react-native';
|
||||
|
||||
export async function configureAudioForVoiceCall(): Promise<void> {
|
||||
if (Platform.OS !== 'ios') return;
|
||||
|
||||
// Step 1: Set Apple audio configuration
|
||||
await AudioSession.setAppleAudioConfiguration({
|
||||
audioCategory: 'playAndRecord',
|
||||
audioCategoryOptions: [
|
||||
'allowBluetooth',
|
||||
'allowBluetoothA2DP',
|
||||
'defaultToSpeaker',
|
||||
'mixWithOthers',
|
||||
],
|
||||
audioMode: 'voiceChat',
|
||||
});
|
||||
|
||||
// Step 2: Configure output
|
||||
await AudioSession.configureAudio({
|
||||
ios: {
|
||||
defaultOutput: 'speaker',
|
||||
},
|
||||
});
|
||||
|
||||
// Step 3: Start session
|
||||
await AudioSession.startAudioSession();
|
||||
}
|
||||
|
||||
export async function stopAudioSession(): Promise<void> {
|
||||
if (Platform.OS !== 'ios') return;
|
||||
await AudioSession.stopAudioSession();
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 5: Voice Call Screen UI
|
||||
|
||||
**File**: `app/voice-call.tsx`
|
||||
|
||||
Simple, clean UI:
|
||||
- Avatar with Julia "J" letter
|
||||
- Call duration timer
|
||||
- Status text (Connecting... / Connected / Julia is speaking...)
|
||||
- Mute button
|
||||
- End call button
|
||||
- Debug logs toggle (for development)
|
||||
|
||||
**NO complex logic in this file** — all LiveKit logic in the hook!
|
||||
|
||||
### Phase 6: Testing Checklist
|
||||
|
||||
- [ ] 6.1. Fresh app launch → Start call → Can hear Julia greeting
|
||||
- [ ] 6.2. Speak → Julia responds → Conversation works
|
||||
- [ ] 6.3. Mute → Unmute → Still works
|
||||
- [ ] 6.4. End call → Clean disconnect
|
||||
- [ ] 6.5. App to background → Audio continues
|
||||
- [ ] 6.6. App to foreground → Still connected
|
||||
- [ ] 6.7. Multiple calls in a row → No memory leaks
|
||||
- [ ] 6.8. No microphone permission → Shows error
|
||||
|
||||
---
|
||||
|
||||
## Files to Create/Modify
|
||||
|
||||
| File | Action | Description |
|
||||
|------|--------|-------------|
|
||||
| `hooks/useLiveKitRoom.ts` | CREATE | Main LiveKit hook with all logic |
|
||||
| `utils/audioSession.ts` | CREATE | iOS AudioSession helpers |
|
||||
| `app/voice-call.tsx` | REPLACE | Simple UI using the hook |
|
||||
| `app/(tabs)/voice.tsx` | SIMPLIFY | Just entry point, remove debug UI |
|
||||
| `services/livekitService.ts` | KEEP | Token fetching (already works) |
|
||||
| `contexts/VoiceTranscriptContext.tsx` | KEEP | Transcript storage |
|
||||
| `components/VoiceIndicator.tsx` | DELETE | Not needed |
|
||||
| `polyfills/livekit-globals.ts` | DELETE | Not needed |
|
||||
|
||||
---
|
||||
|
||||
## Key Principles
|
||||
|
||||
### 1. Separation of Concerns
|
||||
- **Hook** handles ALL LiveKit/WebRTC logic
|
||||
- **Screen** only renders UI based on hook state
|
||||
- **Utils** for platform-specific code (AudioSession)
|
||||
|
||||
### 2. Proper Initialization Order
|
||||
```
|
||||
1. registerGlobals() — WebRTC polyfills
|
||||
2. configureAudioForVoiceCall() — iOS audio
|
||||
3. getToken() — fetch from server
|
||||
4. room.connect() — connect to LiveKit
|
||||
5. room.localParticipant.setMicrophoneEnabled(true) — enable mic
|
||||
```
|
||||
|
||||
### 3. Proper Cleanup Order
|
||||
```
|
||||
1. room.disconnect() — leave room
|
||||
2. stopAudioSession() — release iOS audio
|
||||
3. Clear all refs and state
|
||||
```
|
||||
|
||||
### 4. Error Handling
|
||||
- Every async operation wrapped in try/catch
|
||||
- User-friendly error messages
|
||||
- Automatic retry for network issues
|
||||
- Graceful degradation
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
1. ✅ User can start voice call and hear Julia greeting
|
||||
2. ✅ User can speak and Julia understands (STT works reliably)
|
||||
3. ✅ Julia responds with voice (TTS works)
|
||||
4. ✅ Conversation can continue back and forth
|
||||
5. ✅ Mute/unmute works
|
||||
6. ✅ End call cleanly disconnects
|
||||
7. ✅ No console errors or warnings
|
||||
8. ✅ Works on iOS device (not just simulator)
|
||||
|
||||
---
|
||||
|
||||
## Related Links
|
||||
|
||||
- [LiveKit React Native SDK](https://docs.livekit.io/client-sdk-js/react-native/)
|
||||
- [LiveKit Agents Python](https://docs.livekit.io/agents/)
|
||||
- [Deepgram STT/TTS](https://deepgram.com/)
|
||||
- [iOS AVAudioSession](https://developer.apple.com/documentation/avfaudio/avaudiosession)
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
### Why previous approach failed:
|
||||
|
||||
1. **Too much code in one file** — voice-call.tsx had 900+ lines with all logic mixed
|
||||
2. **Polyfills applied wrong** — Event class polyfill was inside the component
|
||||
3. **AudioSession configured too late** — sometimes after connect() already started
|
||||
4. **No proper error boundaries** — errors silently failed
|
||||
5. **Race conditions** — multiple async operations without proper sequencing
|
||||
|
||||
### What's different this time:
|
||||
|
||||
1. **Hook-based architecture** — single source of truth for state
|
||||
2. **Proper initialization sequence** — documented and enforced
|
||||
3. **Clean separation** — UI knows nothing about WebRTC
|
||||
4. **Comprehensive logging** — every step logged for debugging
|
||||
5. **Test-driven** — write tests before implementation
|
||||
@ -1,373 +0,0 @@
|
||||
/**
|
||||
* Audio Session Configuration Helpers (iOS + Android)
|
||||
*
|
||||
* CRITICAL: This must be configured BEFORE connecting to LiveKit room!
|
||||
* Without proper AudioSession setup, microphone won't work on iOS.
|
||||
* On Android, this controls speaker/earpiece routing.
|
||||
*/
|
||||
|
||||
import { Platform } from 'react-native';
|
||||
|
||||
/**
|
||||
* Represents an available audio output device
|
||||
*/
|
||||
export interface AudioOutputDevice {
|
||||
id: string;
|
||||
name: string;
|
||||
type: 'speaker' | 'earpiece' | 'bluetooth' | 'headphones' | 'unknown';
|
||||
}
|
||||
|
||||
// AudioSession module - use 'any' to avoid complex typing issues with @livekit/react-native
|
||||
// The actual AudioSession from LiveKit has specific enum types that are hard to match statically
|
||||
let audioSessionModule: any = null;
|
||||
|
||||
/**
|
||||
* Import AudioSession module lazily
|
||||
* This is needed because @livekit/react-native must be imported after registerGlobals()
|
||||
*/
|
||||
async function getAudioSession(): Promise<any | null> {
|
||||
if (!audioSessionModule) {
|
||||
const livekit = await import('@livekit/react-native');
|
||||
audioSessionModule = livekit.AudioSession;
|
||||
}
|
||||
|
||||
return audioSessionModule;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure AudioSession for bidirectional voice call (iOS + Android)
|
||||
*
|
||||
* MUST be called BEFORE connecting to LiveKit room!
|
||||
*
|
||||
* iOS Configuration:
|
||||
* - Category: playAndRecord (both speaker and mic)
|
||||
* - Mode: voiceChat (optimized for voice calls)
|
||||
* - Options: Bluetooth, speaker, mix with others
|
||||
*
|
||||
* Android Configuration:
|
||||
* - audioTypeOptions: communication (for voice calls)
|
||||
* - forceHandleAudioRouting: true (to control speaker/earpiece)
|
||||
*/
|
||||
export async function configureAudioForVoiceCall(): Promise<void> {
|
||||
console.log(`[AudioSession] Configuring for voice call on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
console.error('[AudioSession] Failed to get AudioSession module');
|
||||
return;
|
||||
}
|
||||
|
||||
if (Platform.OS === 'ios') {
|
||||
// iOS-specific configuration - FORCE SPEAKER OUTPUT
|
||||
// Using videoChat mode + defaultSpeakerOutput option for guaranteed speaker
|
||||
console.log('[AudioSession] Configuring iOS for SPEAKER output...');
|
||||
|
||||
try {
|
||||
// Primary config: videoChat mode with defaultSpeakerOutput
|
||||
await AudioSession.setAppleAudioConfiguration({
|
||||
audioCategory: 'playAndRecord',
|
||||
audioCategoryOptions: [
|
||||
'allowBluetooth',
|
||||
'mixWithOthers',
|
||||
'defaultToSpeaker', // KEY: Forces speaker as default output
|
||||
],
|
||||
audioMode: 'videoChat', // videoChat mode uses speaker by default
|
||||
});
|
||||
console.log('[AudioSession] iOS videoChat + defaultToSpeaker configured!');
|
||||
} catch (err) {
|
||||
console.warn('[AudioSession] Primary iOS config failed, trying fallback:', err);
|
||||
// Fallback: just videoChat without defaultToSpeaker option
|
||||
await AudioSession.setAppleAudioConfiguration({
|
||||
audioCategory: 'playAndRecord',
|
||||
audioCategoryOptions: ['allowBluetooth', 'mixWithOthers'],
|
||||
audioMode: 'videoChat',
|
||||
});
|
||||
}
|
||||
|
||||
console.log('[AudioSession] Starting iOS audio session...');
|
||||
await AudioSession.startAudioSession();
|
||||
|
||||
// Additionally set default output to speaker (belt and suspenders)
|
||||
try {
|
||||
console.log('[AudioSession] Setting iOS default output to speaker...');
|
||||
await AudioSession.configureAudio({
|
||||
ios: {
|
||||
defaultOutput: 'speaker',
|
||||
},
|
||||
});
|
||||
console.log('[AudioSession] iOS speaker output set!');
|
||||
} catch (outputErr) {
|
||||
console.warn('[AudioSession] Could not set speaker output:', outputErr);
|
||||
}
|
||||
} else if (Platform.OS === 'android') {
|
||||
// Android-specific configuration - FORCE SPEAKER OUTPUT
|
||||
// CRITICAL: Use 'inCommunication' mode + 'music' stream for speaker
|
||||
// Many Android devices default to earpiece for voice calls
|
||||
console.log('[AudioSession] Configuring Android audio for SPEAKER...');
|
||||
|
||||
await AudioSession.configureAudio({
|
||||
android: {
|
||||
// Use inCommunication mode but with music stream for speaker
|
||||
audioTypeOptions: {
|
||||
manageAudioFocus: true,
|
||||
// inCommunication gives us more control over audio routing
|
||||
audioMode: 'inCommunication',
|
||||
audioFocusMode: 'gain',
|
||||
// Use 'music' stream - goes to speaker by default!
|
||||
audioStreamType: 'music',
|
||||
audioAttributesUsageType: 'media',
|
||||
audioAttributesContentType: 'music',
|
||||
},
|
||||
// Force speaker as output
|
||||
preferredOutputList: ['speaker'],
|
||||
// Allow us to control audio routing
|
||||
forceHandleAudioRouting: true,
|
||||
},
|
||||
});
|
||||
|
||||
console.log('[AudioSession] Starting Android audio session...');
|
||||
await AudioSession.startAudioSession();
|
||||
|
||||
// After starting, explicitly set speaker output
|
||||
console.log('[AudioSession] Forcing speaker output...');
|
||||
try {
|
||||
await AudioSession.showAudioRoutePicker?.();
|
||||
} catch {
|
||||
// showAudioRoutePicker may not be available, that's ok
|
||||
}
|
||||
|
||||
console.log('[AudioSession] Android speaker mode configured!');
|
||||
}
|
||||
|
||||
console.log('[AudioSession] Configuration complete!');
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] Configuration error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop AudioSession (iOS + Android)
|
||||
*
|
||||
* Should be called when disconnecting from voice call
|
||||
*/
|
||||
export async function stopAudioSession(): Promise<void> {
|
||||
if (Platform.OS !== 'ios' && Platform.OS !== 'android') {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[AudioSession] Stopping audio session on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
return;
|
||||
}
|
||||
|
||||
await AudioSession.stopAudioSession();
|
||||
console.log('[AudioSession] Stopped');
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] Error stopping:', error);
|
||||
// Don't throw - cleanup errors are not critical
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconfigure audio session after remote track arrives (iOS + Android)
|
||||
*
|
||||
* Sometimes the OS needs a kick to properly route audio after remote participant joins
|
||||
*/
|
||||
export async function reconfigureAudioForPlayback(): Promise<void> {
|
||||
if (Platform.OS !== 'ios' && Platform.OS !== 'android') {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[AudioSession] Reconfiguring for playback (SPEAKER) on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (Platform.OS === 'ios') {
|
||||
// Reconfigure iOS - force speaker output
|
||||
await AudioSession.setAppleAudioConfiguration({
|
||||
audioCategory: 'playAndRecord',
|
||||
audioCategoryOptions: [
|
||||
'allowBluetooth',
|
||||
'mixWithOthers',
|
||||
'defaultToSpeaker', // Force speaker
|
||||
],
|
||||
audioMode: 'videoChat', // videoChat = speaker by default
|
||||
});
|
||||
|
||||
// Also set default output to speaker
|
||||
await AudioSession.configureAudio({
|
||||
ios: {
|
||||
defaultOutput: 'speaker',
|
||||
},
|
||||
});
|
||||
console.log('[AudioSession] iOS reconfigured for speaker playback');
|
||||
} else if (Platform.OS === 'android') {
|
||||
// Reconfigure Android audio to ensure speaker output
|
||||
// Using inCommunication + music stream for reliable speaker routing
|
||||
await AudioSession.configureAudio({
|
||||
android: {
|
||||
audioTypeOptions: {
|
||||
manageAudioFocus: true,
|
||||
audioMode: 'inCommunication',
|
||||
audioFocusMode: 'gain',
|
||||
audioStreamType: 'music',
|
||||
audioAttributesUsageType: 'media',
|
||||
audioAttributesContentType: 'music',
|
||||
},
|
||||
preferredOutputList: ['speaker'],
|
||||
forceHandleAudioRouting: true,
|
||||
},
|
||||
});
|
||||
console.log('[AudioSession] Android reconfigured for speaker playback');
|
||||
}
|
||||
|
||||
console.log('[AudioSession] Reconfigured successfully');
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] Reconfigure error:', error);
|
||||
// Don't throw - this is a best-effort operation
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Switch audio output between speaker and earpiece (iOS + Android)
|
||||
*
|
||||
* @param useSpeaker - true for speaker, false for earpiece
|
||||
*/
|
||||
/**
|
||||
* Get list of available audio output devices
|
||||
*
|
||||
* @returns Array of available audio output devices
|
||||
*/
|
||||
export async function getAvailableAudioOutputs(): Promise<AudioOutputDevice[]> {
|
||||
console.log(`[AudioSession] Getting available audio outputs on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
console.error('[AudioSession] Failed to get AudioSession module');
|
||||
return [];
|
||||
}
|
||||
|
||||
const outputs = await AudioSession.getAudioOutputs();
|
||||
console.log('[AudioSession] Available outputs:', outputs);
|
||||
|
||||
// Map the raw outputs to our AudioOutputDevice interface
|
||||
if (Array.isArray(outputs)) {
|
||||
return outputs.map((output: any) => ({
|
||||
id: output.id || output.deviceId || String(output),
|
||||
name: output.name || output.deviceName || String(output),
|
||||
type: mapDeviceType(output.type || output.deviceType),
|
||||
}));
|
||||
}
|
||||
|
||||
return [];
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] getAvailableAudioOutputs error:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a specific audio output device by ID
|
||||
*
|
||||
* @param deviceId - The ID of the device to select
|
||||
*/
|
||||
export async function selectAudioOutput(deviceId: string): Promise<void> {
|
||||
console.log(`[AudioSession] Selecting audio output: ${deviceId} on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
console.error('[AudioSession] Failed to get AudioSession module');
|
||||
return;
|
||||
}
|
||||
|
||||
await AudioSession.selectAudioOutput(deviceId);
|
||||
console.log(`[AudioSession] Audio output selected: ${deviceId}`);
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] selectAudioOutput error:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map raw device type to our AudioOutputDevice type
|
||||
*/
|
||||
function mapDeviceType(rawType: string | undefined): AudioOutputDevice['type'] {
|
||||
if (!rawType) return 'unknown';
|
||||
|
||||
const type = rawType.toLowerCase();
|
||||
if (type.includes('speaker')) return 'speaker';
|
||||
if (type.includes('earpiece') || type.includes('receiver')) return 'earpiece';
|
||||
if (type.includes('bluetooth')) return 'bluetooth';
|
||||
if (type.includes('headphone') || type.includes('headset') || type.includes('wired')) return 'headphones';
|
||||
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Switch audio output between speaker and earpiece (iOS + Android)
|
||||
*
|
||||
* @param useSpeaker - true for speaker, false for earpiece
|
||||
*/
|
||||
export async function setAudioOutput(useSpeaker: boolean): Promise<void> {
|
||||
console.log(`[AudioSession] Setting audio output to ${useSpeaker ? 'SPEAKER' : 'EARPIECE'} on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
console.error('[AudioSession] Failed to get AudioSession module');
|
||||
return;
|
||||
}
|
||||
|
||||
if (Platform.OS === 'ios') {
|
||||
// iOS: Use videoChat mode + defaultToSpeaker for speaker, voiceChat for earpiece
|
||||
await AudioSession.setAppleAudioConfiguration({
|
||||
audioCategory: 'playAndRecord',
|
||||
audioCategoryOptions: useSpeaker
|
||||
? ['allowBluetooth', 'mixWithOthers', 'defaultToSpeaker']
|
||||
: ['allowBluetooth', 'mixWithOthers'],
|
||||
audioMode: useSpeaker ? 'videoChat' : 'voiceChat',
|
||||
});
|
||||
|
||||
// Also set default output
|
||||
await AudioSession.configureAudio({
|
||||
ios: {
|
||||
defaultOutput: useSpeaker ? 'speaker' : 'earpiece',
|
||||
},
|
||||
});
|
||||
} else if (Platform.OS === 'android') {
|
||||
// Android: Switch stream type to control speaker/earpiece
|
||||
// - 'music' stream goes to speaker by default
|
||||
// - 'voiceCall' stream goes to earpiece by default
|
||||
await AudioSession.configureAudio({
|
||||
android: {
|
||||
audioTypeOptions: {
|
||||
manageAudioFocus: true,
|
||||
audioMode: useSpeaker ? 'normal' : 'inCommunication',
|
||||
audioFocusMode: 'gain',
|
||||
// Key difference: music→speaker, voiceCall→earpiece
|
||||
audioStreamType: useSpeaker ? 'music' : 'voiceCall',
|
||||
audioAttributesUsageType: useSpeaker ? 'media' : 'voiceCommunication',
|
||||
audioAttributesContentType: useSpeaker ? 'music' : 'speech',
|
||||
},
|
||||
// Also set preferred output list
|
||||
preferredOutputList: useSpeaker ? ['speaker'] : ['earpiece'],
|
||||
forceHandleAudioRouting: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[AudioSession] Audio output set to ${useSpeaker ? 'SPEAKER' : 'EARPIECE'}`);
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] setAudioOutput error:', error);
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user