Compare commits

..

No commits in common. "6d339acc642740c0bbd71455db9b77bf21d693ea" and "5b5cdf1098d2926fdf5822885ce163f205b27f7f" have entirely different histories.

17 changed files with 2615 additions and 1562 deletions

View File

@ -1,8 +1,8 @@
{
"expo": {
"name": "WellNuo Lite",
"name": "WellNuo",
"slug": "WellNuo",
"version": "1.0.6",
"version": "1.0.5",
"orientation": "portrait",
"icon": "./assets/images/icon.png",
"scheme": "wellnuo",
@ -55,13 +55,8 @@
"favicon": "./assets/images/favicon.png"
},
"plugins": [
[
"@jamsch/expo-speech-recognition",
{
"microphonePermission": "WellNuo needs access to your microphone to listen to your voice commands.",
"speechRecognitionPermission": "WellNuo uses speech recognition to convert your voice to text for Julia AI."
}
],
"@livekit/react-native-expo-plugin",
"@config-plugins/react-native-webrtc",
"expo-router",
[
"expo-splash-screen",

View File

@ -1,225 +1,17 @@
import { Tabs } from 'expo-router';
import React, { useCallback, useEffect, useRef } from 'react';
import { Platform, View, AppState, AppStateStatus } from 'react-native';
import React from 'react';
import { Platform } from 'react-native';
import { Feather } from '@expo/vector-icons';
import { useSafeAreaInsets } from 'react-native-safe-area-context';
import { HapticTab } from '@/components/haptic-tab';
import { VoiceFAB } from '@/components/VoiceFAB';
import { AppColors } from '@/constants/theme';
import { useColorScheme } from '@/hooks/use-color-scheme';
import { useVoiceCall } from '@/contexts/VoiceCallContext';
import { useVoice } from '@/contexts/VoiceContext';
import { useSpeechRecognition } from '@/hooks/useSpeechRecognition';
export default function TabLayout() {
const colorScheme = useColorScheme();
const isDark = colorScheme === 'dark';
const insets = useSafeAreaInsets();
// VoiceFAB uses VoiceCallContext internally to hide when call is active
useVoiceCall(); // Ensure context is available
// Voice context for listening mode toggle and TTS interruption
const {
isListening,
isSpeaking,
status,
startSession,
stopSession,
interruptIfSpeaking,
setTranscript,
setPartialTranscript,
sendTranscript,
} = useVoice();
// Track whether session is active (listening mode on, even during TTS)
const sessionActiveRef = useRef(false);
// Track if we need to restart STT after it ends during active session
const shouldRestartSTTRef = useRef(false);
// Track pending transcript from interruption (to send after TTS stops)
const pendingInterruptTranscriptRef = useRef<string | null>(null);
// Callback for voice detection - interrupt TTS when user speaks
const handleVoiceDetected = useCallback(() => {
// Interrupt TTS when user starts speaking during 'speaking' state
if (status === 'speaking' || isSpeaking) {
console.log('[TabLayout] Voice detected during TTS playback - INTERRUPTING Julia');
const wasInterrupted = interruptIfSpeaking();
if (wasInterrupted) {
console.log('[TabLayout] TTS interrupted successfully, now listening to user');
}
}
}, [status, isSpeaking, interruptIfSpeaking]);
// Callback when STT ends - may need to restart if session is still active
const handleSTTEnd = useCallback(() => {
console.log('[TabLayout] STT ended, sessionActive:', sessionActiveRef.current);
// If session is still active (user didn't stop it), we should restart STT
// This ensures STT continues during and after TTS playback
if (sessionActiveRef.current) {
shouldRestartSTTRef.current = true;
}
}, []);
// Callback for STT results
const handleSpeechResult = useCallback((transcript: string, isFinal: boolean) => {
if (isFinal) {
// Check if we're still in speaking mode (user interrupted Julia)
if (isSpeaking || status === 'speaking') {
// Store the transcript to send after TTS fully stops
console.log('[TabLayout] Got final result while TTS playing - storing for after interruption:', transcript);
pendingInterruptTranscriptRef.current = transcript;
} else {
// Normal case: not speaking, send immediately
setTranscript(transcript);
sendTranscript(transcript);
}
} else {
setPartialTranscript(transcript);
}
}, [setTranscript, setPartialTranscript, sendTranscript, isSpeaking, status]);
// Speech recognition with voice detection callback
const {
startListening,
stopListening,
isListening: sttIsListening,
} = useSpeechRecognition({
lang: 'ru-RU',
continuous: true,
interimResults: true,
onVoiceDetected: handleVoiceDetected,
onResult: handleSpeechResult,
onEnd: handleSTTEnd,
});
// Update session active ref when isListening changes
useEffect(() => {
sessionActiveRef.current = isListening;
if (!isListening) {
shouldRestartSTTRef.current = false;
}
}, [isListening]);
// Start/stop STT when voice session starts/stops
useEffect(() => {
if (isListening) {
console.log('[TabLayout] Starting STT for voice session');
startListening();
} else {
console.log('[TabLayout] Stopping STT - session ended');
stopListening();
}
}, [isListening, startListening, stopListening]);
// Restart STT if it ended while session is still active
// This ensures continuous listening even during/after TTS playback
useEffect(() => {
if (shouldRestartSTTRef.current && sessionActiveRef.current && !sttIsListening) {
console.log('[TabLayout] Restarting STT - session still active');
shouldRestartSTTRef.current = false;
// Small delay to ensure clean restart
const timer = setTimeout(() => {
if (sessionActiveRef.current) {
startListening();
}
}, 100);
return () => clearTimeout(timer);
}
}, [sttIsListening, startListening]);
// Track previous status to detect transition from speaking to listening
const prevStatusRef = useRef<typeof status>('idle');
// Auto-restart STT when TTS finishes (status changes from 'speaking' to 'listening')
// Also process any pending transcript from user interruption
useEffect(() => {
const prevStatus = prevStatusRef.current;
prevStatusRef.current = status;
// When transitioning from speaking to listening, handle pending interrupt transcript
if (prevStatus === 'speaking' && status === 'listening' && sessionActiveRef.current) {
console.log('[TabLayout] TTS finished/interrupted - checking for pending transcript');
// Process pending transcript from interruption if any
const pendingTranscript = pendingInterruptTranscriptRef.current;
if (pendingTranscript) {
console.log('[TabLayout] Processing pending interrupt transcript:', pendingTranscript);
pendingInterruptTranscriptRef.current = null;
setTranscript(pendingTranscript);
sendTranscript(pendingTranscript);
}
// Small delay to ensure TTS cleanup is complete, then restart STT
const timer = setTimeout(() => {
if (sessionActiveRef.current && !sttIsListening) {
startListening();
}
}, 200);
return () => clearTimeout(timer);
}
}, [status, sttIsListening, startListening, setTranscript, sendTranscript]);
// ============================================================================
// TAB NAVIGATION PERSISTENCE
// Ensure voice session continues when user switches between tabs.
// The session state is in VoiceContext (root level), but STT may stop due to:
// 1. Native audio session changes
// 2. Tab unmount/remount (though tabs layout doesn't unmount)
// 3. AppState changes (background/foreground)
// ============================================================================
// Monitor and recover STT state during tab navigation
// If session is active but STT stopped unexpectedly, restart it
// IMPORTANT: STT should run DURING TTS playback to detect user interruption!
useEffect(() => {
// Check every 500ms if STT needs to be restarted
const intervalId = setInterval(() => {
// Only act if session should be active (isListening from VoiceContext)
// but STT is not actually listening
// Note: We DO want STT running during 'speaking' to detect interruption!
// Only skip during 'processing' (API call in progress)
if (
sessionActiveRef.current &&
!sttIsListening &&
status !== 'processing'
) {
console.log('[TabLayout] STT watchdog: restarting STT (session active but STT stopped, status:', status, ')');
startListening();
}
}, 500);
return () => clearInterval(intervalId);
}, [sttIsListening, status, startListening]);
// Handle app state changes (background/foreground)
// When app comes back to foreground, restart STT if session was active
useEffect(() => {
const handleAppStateChange = (nextAppState: AppStateStatus) => {
if (nextAppState === 'active' && sessionActiveRef.current) {
// App came to foreground, give it a moment then check STT
// STT should run even during 'speaking' to detect user interruption
setTimeout(() => {
if (sessionActiveRef.current && !sttIsListening && status !== 'processing') {
console.log('[TabLayout] App foregrounded - restarting STT');
startListening();
}
}, 300);
}
};
const subscription = AppState.addEventListener('change', handleAppStateChange);
return () => subscription.remove();
}, [sttIsListening, status, startListening]);
// Handle voice FAB press - toggle listening mode
const handleVoiceFABPress = useCallback(() => {
if (isListening) {
stopSession();
} else {
startSession();
}
}, [isListening, startSession, stopSession]);
// Calculate tab bar height based on safe area
// On iOS with home indicator, insets.bottom is ~34px
@ -232,86 +24,74 @@ export default function TabLayout() {
const tabBarHeight = 60 + bottomPadding; // 60px for content + safe area padding
return (
<View style={{ flex: 1 }}>
<Tabs
screenOptions={{
tabBarActiveTintColor: AppColors.primary,
tabBarInactiveTintColor: isDark ? '#9BA1A6' : '#687076',
tabBarStyle: {
backgroundColor: isDark ? '#151718' : AppColors.background,
borderTopColor: isDark ? '#2D3135' : AppColors.border,
height: tabBarHeight,
paddingBottom: bottomPadding,
paddingTop: 10,
},
tabBarLabelStyle: {
fontSize: 11,
fontWeight: '500',
},
headerShown: false,
tabBarButton: HapticTab,
<Tabs
screenOptions={{
tabBarActiveTintColor: AppColors.primary,
tabBarInactiveTintColor: isDark ? '#9BA1A6' : '#687076',
tabBarStyle: {
backgroundColor: isDark ? '#151718' : AppColors.background,
borderTopColor: isDark ? '#2D3135' : AppColors.border,
height: tabBarHeight,
paddingBottom: bottomPadding,
paddingTop: 10,
},
tabBarLabelStyle: {
fontSize: 11,
fontWeight: '500',
},
headerShown: false,
tabBarButton: HapticTab,
}}
>
<Tabs.Screen
name="index"
options={{
title: 'Dashboard',
tabBarIcon: ({ color, size }) => (
<Feather name="grid" size={22} color={color} />
),
}}
>
<Tabs.Screen
name="index"
options={{
title: 'Dashboard',
tabBarIcon: ({ color, size }) => (
<Feather name="grid" size={22} color={color} />
),
}}
/>
{/* Hide old dashboard - now index shows WebView dashboard */}
<Tabs.Screen
name="dashboard"
options={{
href: null,
}}
/>
{/* Chat with Julia AI */}
<Tabs.Screen
name="chat"
options={{
title: 'Julia',
tabBarIcon: ({ color, size }) => (
<Feather name="message-circle" size={22} color={color} />
),
}}
/>
<Tabs.Screen
name="profile"
options={{
title: 'Profile',
tabBarIcon: ({ color, size }) => (
<Feather name="user" size={22} color={color} />
),
}}
/>
{/* Hide explore tab */}
<Tabs.Screen
name="explore"
options={{
href: null,
}}
/>
{/* Audio Debug - hidden */}
<Tabs.Screen
name="audio-debug"
options={{
href: null,
}}
/>
{/* Beneficiaries - hidden from tab bar but keeps tab bar visible */}
<Tabs.Screen
name="beneficiaries"
options={{
href: null,
}}
/>
</Tabs>
{/* Voice FAB - toggle listening mode */}
<VoiceFAB onPress={handleVoiceFABPress} isListening={isListening} />
</View>
/>
{/* Hide old dashboard - now index shows WebView dashboard */}
<Tabs.Screen
name="dashboard"
options={{
href: null,
}}
/>
{/* Chat with Julia AI */}
<Tabs.Screen
name="chat"
options={{
title: 'Julia',
tabBarIcon: ({ color, size }) => (
<Feather name="message-circle" size={22} color={color} />
),
}}
/>
<Tabs.Screen
name="profile"
options={{
title: 'Profile',
tabBarIcon: ({ color, size }) => (
<Feather name="user" size={22} color={color} />
),
}}
/>
{/* Hide explore tab */}
<Tabs.Screen
name="explore"
options={{
href: null,
}}
/>
{/* Beneficiaries - hidden from tab bar but keeps tab bar visible */}
<Tabs.Screen
name="beneficiaries"
options={{
href: null,
}}
/>
</Tabs>
);
}

View File

@ -1,7 +1,7 @@
/**
* Chat Screen - Text Chat with Julia AI
*
* Clean text chat interface.
* Clean text chat interface with integrated voice calls.
*/
import React, { useState, useCallback, useRef, useEffect } from 'react';
@ -17,18 +17,39 @@ import {
Keyboard,
Platform,
Alert,
Animated,
ScrollView,
} from 'react-native';
import * as Clipboard from 'expo-clipboard';
import { KeyboardAvoidingView } from 'react-native-keyboard-controller';
import { Ionicons } from '@expo/vector-icons';
import { SafeAreaView } from 'react-native-safe-area-context';
import { useRouter, useFocusEffect } from 'expo-router';
import { activateKeepAwakeAsync, deactivateKeepAwake } from 'expo-keep-awake';
import { api } from '@/services/api';
import { useBeneficiary } from '@/contexts/BeneficiaryContext';
import { useVoiceTranscript } from '@/contexts/VoiceTranscriptContext';
import { useTextToSpeech } from '@/hooks/useTextToSpeech';
import { useVoiceCall } from '@/contexts/VoiceCallContext';
import { AppColors, BorderRadius, FontSizes, Spacing } from '@/constants/theme';
import type { Message, Beneficiary } from '@/types';
// LiveKit imports
import {
registerGlobals,
LiveKitRoom,
useVoiceAssistant,
useConnectionState,
useTrackTranscription,
useTracks,
} from '@livekit/react-native';
import { ConnectionState, Track } from 'livekit-client';
import { getToken, type BeneficiaryData } from '@/services/livekitService';
import { useAuth } from '@/contexts/AuthContext';
import { getAvailableAudioOutputs, selectAudioOutput, setAudioOutput } from '@/utils/audioSession';
// Register LiveKit globals (must be called before using LiveKit)
registerGlobals();
const API_URL = 'https://eluxnetworks.net/function/well-api/api';
// WellNuo API credentials (same as julia-agent)
@ -108,22 +129,165 @@ function normalizeQuestion(userMessage: string): string {
return userMessage;
}
// ============================================================================
// Voice Call Transcript Handler (invisible - just captures transcripts)
// ============================================================================
interface VoiceCallTranscriptHandlerProps {
onTranscript: (role: 'user' | 'assistant', text: string) => void;
onDurationUpdate: (seconds: number) => void;
onLog?: (message: string) => void;
}
// Debug log entry type
interface DebugLogEntry {
id: string;
timestamp: string;
level: 'info' | 'warn' | 'error' | 'success';
message: string;
}
function VoiceCallTranscriptHandler({ onTranscript, onDurationUpdate, onLog }: VoiceCallTranscriptHandlerProps) {
const connectionState = useConnectionState();
const { audioTrack, state: agentState } = useVoiceAssistant();
const [callDuration, setCallDuration] = useState(0);
const [lastProcessedId, setLastProcessedId] = useState<string | null>(null);
const prevConnectionStateRef = useRef<ConnectionState | null>(null);
const prevAgentStateRef = useRef<string | null>(null);
// Track all audio tracks for transcription
const tracks = useTracks([Track.Source.Microphone, Track.Source.Unknown], { onlySubscribed: false });
// Get transcription from agent's audio track
const { segments: agentSegments } = useTrackTranscription(audioTrack);
// Get transcription from user's microphone
const localTrack = tracks.find(t => t.participant?.isLocal);
const { segments: userSegments } = useTrackTranscription(localTrack);
// Log connection state changes
useEffect(() => {
if (prevConnectionStateRef.current !== connectionState) {
const msg = `Connection: ${prevConnectionStateRef.current || 'initial'} -> ${connectionState}`;
console.log('[VoiceCall]', msg);
onLog?.(msg);
prevConnectionStateRef.current = connectionState;
}
}, [connectionState, onLog]);
// Log agent state changes
useEffect(() => {
if (agentState && prevAgentStateRef.current !== agentState) {
const msg = `Agent state: ${prevAgentStateRef.current || 'initial'} -> ${agentState}`;
console.log('[VoiceCall]', msg);
onLog?.(msg);
prevAgentStateRef.current = agentState;
}
}, [agentState, onLog]);
// Log audio track info
useEffect(() => {
if (audioTrack) {
// audioTrack may have different properties depending on LiveKit version
const trackInfo = JSON.stringify({
hasTrack: !!audioTrack,
publication: (audioTrack as any)?.publication?.sid || 'no-pub',
trackSid: (audioTrack as any)?.sid || (audioTrack as any)?.trackSid || 'unknown',
});
const msg = `Audio track received: ${trackInfo}`;
console.log('[VoiceCall]', msg);
onLog?.(msg);
}
}, [audioTrack, onLog]);
// Log all tracks
useEffect(() => {
if (tracks.length > 0) {
const trackInfo = tracks.map(t => {
const participant = t.participant?.identity || 'unknown';
const source = t.source || 'unknown';
const isLocal = t.participant?.isLocal ? 'local' : 'remote';
return `${participant}(${isLocal}):${source}`;
}).join(', ');
const msg = `Tracks (${tracks.length}): ${trackInfo}`;
console.log('[VoiceCall]', msg);
onLog?.(msg);
}
}, [tracks, onLog]);
// Process agent transcription
useEffect(() => {
if (agentSegments && agentSegments.length > 0) {
const lastSegment = agentSegments[agentSegments.length - 1];
if (lastSegment && lastSegment.final && lastSegment.id !== lastProcessedId) {
setLastProcessedId(lastSegment.id);
onTranscript('assistant', lastSegment.text);
const msg = `Julia said: "${lastSegment.text}"`;
console.log('[VoiceCall]', msg);
onLog?.(msg);
}
}
}, [agentSegments, lastProcessedId, onTranscript, onLog]);
// Process user transcription
const [lastUserSegmentId, setLastUserSegmentId] = useState<string | null>(null);
useEffect(() => {
if (userSegments && userSegments.length > 0) {
const lastSegment = userSegments[userSegments.length - 1];
if (lastSegment && lastSegment.final && lastSegment.id !== lastUserSegmentId) {
setLastUserSegmentId(lastSegment.id);
onTranscript('user', lastSegment.text);
const msg = `User said: "${lastSegment.text}"`;
console.log('[VoiceCall]', msg);
onLog?.(msg);
}
}
}, [userSegments, lastUserSegmentId, onTranscript, onLog]);
// Call duration timer - use ref to avoid state updates during render
const durationRef = useRef(0);
useEffect(() => {
if (connectionState === ConnectionState.Connected) {
const interval = setInterval(() => {
durationRef.current += 1;
onDurationUpdate(durationRef.current);
}, 1000);
return () => clearInterval(interval);
}
}, [connectionState, onDurationUpdate]);
// Keep screen awake during call
useEffect(() => {
activateKeepAwakeAsync('voice-call');
return () => {
deactivateKeepAwake('voice-call');
};
}, []);
// This component renders nothing - it just handles transcripts
return null;
}
export default function ChatScreen() {
const router = useRouter();
const { currentBeneficiary, setCurrentBeneficiary } = useBeneficiary();
const { transcript, hasNewTranscript, markTranscriptAsShown, getTranscriptAsMessages } = useVoiceTranscript();
// TTS for reading Julia's responses aloud
const { speak, stop: stopTTS, isSpeaking } = useTextToSpeech({
language: 'ru-RU',
rate: 1.0,
});
const { addTranscriptEntry, clearTranscript } = useVoiceTranscript();
const { user } = useAuth();
const {
callState,
startCall,
endCall: endVoiceCallContext,
minimizeCall,
maximizeCall,
updateDuration,
isCallActive,
} = useVoiceCall();
// Helper to create initial message with beneficiary name
const createInitialMessage = useCallback((beneficiaryName?: string | null): Message => ({
id: '1',
role: 'assistant',
content: `Hello! I'm Julia, your AI wellness companion.${beneficiaryName ? `\n\nI'm here to help you monitor ${beneficiaryName}.` : ''}\n\nType a message below to chat with me.`,
content: `Hello! I'm Julia, your AI wellness companion.${beneficiaryName ? `\n\nI'm here to help you monitor ${beneficiaryName}.` : ''}\n\nTap the phone button to start a voice call, or type a message below.`,
timestamp: new Date(),
}), []);
@ -135,6 +299,84 @@ export default function ChatScreen() {
const [messages, setMessages] = useState<Message[]>([createInitialMessage(null)]);
const [sortNewestFirst, setSortNewestFirst] = useState(false);
// Voice call state (local connecting state only)
const [isConnectingVoice, setIsConnectingVoice] = useState(false);
// Debug logs state
const [debugLogs, setDebugLogs] = useState<DebugLogEntry[]>([]);
const [showDebugPanel, setShowDebugPanel] = useState(false);
const debugLogIdRef = useRef(0);
// Add debug log entry
const addDebugLog = useCallback((message: string, level: DebugLogEntry['level'] = 'info') => {
const now = new Date();
const timestamp = now.toLocaleTimeString('en-US', {
hour12: false,
hour: '2-digit',
minute: '2-digit',
second: '2-digit',
}) + '.' + now.getMilliseconds().toString().padStart(3, '0');
const entry: DebugLogEntry = {
id: `log-${++debugLogIdRef.current}`,
timestamp,
level,
message,
};
setDebugLogs(prev => [...prev.slice(-100), entry]); // Keep last 100 logs
}, []);
// Copy logs to clipboard
const copyLogsToClipboard = useCallback(async () => {
const logsText = debugLogs.map(log => `[${log.timestamp}] ${log.level.toUpperCase()}: ${log.message}`).join('\n');
await Clipboard.setStringAsync(logsText);
Alert.alert('Copied', `${debugLogs.length} log entries copied to clipboard`);
}, [debugLogs]);
// Clear debug logs
const clearDebugLogs = useCallback(() => {
setDebugLogs([]);
addDebugLog('Logs cleared', 'info');
}, [addDebugLog]);
// Pulsing animation for active call
const pulseAnim = useRef(new Animated.Value(1)).current;
// Start pulsing animation when call is active
useEffect(() => {
if (isCallActive) {
const pulse = Animated.loop(
Animated.sequence([
Animated.timing(pulseAnim, {
toValue: 1.15,
duration: 600,
useNativeDriver: true,
}),
Animated.timing(pulseAnim, {
toValue: 1,
duration: 600,
useNativeDriver: true,
}),
])
);
pulse.start();
return () => pulse.stop();
} else {
pulseAnim.setValue(1);
}
}, [isCallActive, pulseAnim]);
// Track if we've shown the voice call separator for current call
const [hasShownVoiceSeparator, setHasShownVoiceSeparator] = useState(false);
// Reset separator flag when starting a new call
useEffect(() => {
if (isCallActive && !hasShownVoiceSeparator) {
// Will show separator on first voice message
} else if (!isCallActive) {
setHasShownVoiceSeparator(false);
}
}, [isCallActive]);
const [input, setInput] = useState('');
const [isSending, setIsSending] = useState(false);
const inputRef = useRef('');
@ -187,15 +429,20 @@ export default function ChatScreen() {
old: previousDeploymentIdRef.current,
new: customDeploymentId,
name: deploymentName,
isCallActive,
});
// End any active call
endVoiceCallContext();
// Clear chat with new initial message (use name instead of ID)
setMessages([createInitialMessage(deploymentName)]);
setHasShownVoiceSeparator(false);
// Update ref
previousDeploymentIdRef.current = customDeploymentId;
}
}, [customDeploymentId, deploymentName, createInitialMessage]);
}, [customDeploymentId, deploymentName, createInitialMessage, isCallActive, endVoiceCallContext]);
// Update initial message when deploymentName is loaded (but only if chat has just the initial message)
useEffect(() => {
@ -204,26 +451,6 @@ export default function ChatScreen() {
}
}, [deploymentName, createInitialMessage]);
// Add voice transcript messages to chat when new ones arrive
useEffect(() => {
if (hasNewTranscript && transcript.length > 0) {
const voiceMessages = getTranscriptAsMessages();
if (voiceMessages.length > 0) {
setMessages(prev => {
// Filter out messages that are already in the chat (by id)
const existingIds = new Set(prev.map(m => m.id));
const newMessages = voiceMessages.filter(m => !existingIds.has(m.id));
if (newMessages.length > 0) {
console.log('[Chat] Adding', newMessages.length, 'voice messages to chat');
return [...prev, ...newMessages];
}
return prev;
});
}
markTranscriptAsShown();
}
}, [hasNewTranscript, transcript, getTranscriptAsMessages, markTranscriptAsShown]);
// Load beneficiaries
const loadBeneficiaries = useCallback(async () => {
setLoadingBeneficiaries(true);
@ -289,6 +516,161 @@ export default function ChatScreen() {
setShowBeneficiaryPicker(false);
}, [setCurrentBeneficiary]);
// ============================================================================
// Voice Call Functions
// ============================================================================
// Start voice call
const startVoiceCall = useCallback(async () => {
if (isConnectingVoice || isCallActive) return;
setIsConnectingVoice(true);
addDebugLog('Starting voice call...', 'info');
console.log('[Chat] Starting voice call...');
try {
// Build beneficiary data for the agent
// Priority: customDeploymentId from settings > currentBeneficiary > first beneficiary > fallback
const beneficiaryData: BeneficiaryData = {
deploymentId: customDeploymentId || currentBeneficiary?.id?.toString() || beneficiaries[0]?.id?.toString() || '21',
beneficiaryNamesDict: {},
};
addDebugLog(`Deployment ID: ${beneficiaryData.deploymentId}`, 'info');
// Add names dict if not in single deployment mode
if (!SINGLE_DEPLOYMENT_MODE) {
beneficiaries.forEach(b => {
beneficiaryData.beneficiaryNamesDict[b.id.toString()] = b.name;
});
}
// Get LiveKit token
addDebugLog('Requesting LiveKit token...', 'info');
const userIdStr = user?.user_id?.toString() || 'user-' + Date.now();
const tokenResponse = await getToken(userIdStr, beneficiaryData);
if (!tokenResponse.success || !tokenResponse.data) {
throw new Error(tokenResponse.error || 'Failed to get voice token');
}
addDebugLog(`Token received! Room: ${tokenResponse.data.roomName}`, 'success');
addDebugLog(`WS URL: ${tokenResponse.data.wsUrl}`, 'info');
console.log('[Chat] Got voice token, connecting to room:', tokenResponse.data.roomName);
// Add call start message to chat
const callStartMessage: Message = {
id: `call-start-${Date.now()}`,
role: 'assistant',
content: 'Voice call started',
timestamp: new Date(),
isSystem: true,
};
setMessages(prev => [...prev, callStartMessage]);
// Clear previous transcript and start call via context
clearTranscript();
addDebugLog('Calling startCall with token and wsUrl...', 'info');
startCall({
token: tokenResponse.data.token,
wsUrl: tokenResponse.data.wsUrl,
beneficiaryName: currentBeneficiary?.name,
beneficiaryId: currentBeneficiary?.id?.toString(),
});
addDebugLog('startCall called, waiting for LiveKitRoom to connect...', 'success');
} catch (error) {
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
addDebugLog(`Voice call error: ${errorMsg}`, 'error');
console.error('[Chat] Voice call error:', error);
Alert.alert(
'Voice Call Error',
error instanceof Error ? error.message : 'Failed to start voice call'
);
} finally {
setIsConnectingVoice(false);
}
}, [isConnectingVoice, isCallActive, currentBeneficiary, beneficiaries, user, clearTranscript, startCall, customDeploymentId, addDebugLog]);
// End voice call and log to chat
const endVoiceCall = useCallback(() => {
console.log('[Chat] Ending voice call...');
// Add call end message to chat with duration
const duration = callState.callDuration;
const minutes = Math.floor(duration / 60);
const seconds = duration % 60;
const durationStr = `${minutes}:${seconds.toString().padStart(2, '0')}`;
const callEndMessage: Message = {
id: `call-end-${Date.now()}-${Math.random().toString(36).slice(2)}`,
role: 'assistant',
content: `Call ended (${durationStr})`,
timestamp: new Date(),
isSystem: true,
};
setMessages(prev => [...prev, callEndMessage]);
setHasShownVoiceSeparator(false);
endVoiceCallContext();
}, [endVoiceCallContext, callState.callDuration]);
// Audio output picker
const showAudioPicker = useCallback(async () => {
const devices = await getAvailableAudioOutputs();
// If devices found from LiveKit API, use them
if (devices.length > 0) {
const buttons: any[] = devices.map(device => ({
text: device.name,
onPress: () => selectAudioOutput(device.id),
}));
buttons.push({ text: 'Cancel', style: 'cancel' });
Alert.alert('Audio Output', 'Select audio device:', buttons);
return;
}
// Fallback for Android (and iOS if no devices found)
// Show simple Speaker/Earpiece toggle using setAudioOutput()
Alert.alert(
'Audio Output',
'Select audio output:',
[
{
text: '🔊 Speaker',
onPress: () => setAudioOutput(true),
},
{
text: '📱 Earpiece',
onPress: () => setAudioOutput(false),
},
{ text: 'Cancel', style: 'cancel' },
]
);
}, []);
// Handle voice transcript entries - add to chat in real-time
const handleVoiceTranscript = useCallback((role: 'user' | 'assistant', text: string) => {
if (!text.trim()) return;
// Create voice message and add to chat immediately
const voiceMessage: Message = {
id: `voice-${Date.now()}-${Math.random().toString(36).slice(2)}`,
role,
content: text.trim(),
timestamp: new Date(),
isVoice: true,
};
setMessages(prev => [...prev, voiceMessage]);
// Scroll to latest message (respects sort mode)
setTimeout(() => {
scrollToLatestMessage(true);
}, 100);
// Also store in transcript context for persistence
addTranscriptEntry(role, text);
}, [hasShownVoiceSeparator, addTranscriptEntry, scrollToLatestMessage]);
// Cached API token for WellNuo
const apiTokenRef = useRef<string | null>(null);
@ -383,17 +765,13 @@ export default function ChatScreen() {
const data = await response.json();
if (data.ok && data.response?.body) {
const responseText = data.response.body;
const assistantMessage: Message = {
id: (Date.now() + 1).toString(),
role: 'assistant',
content: responseText,
content: data.response.body,
timestamp: new Date(),
};
setMessages(prev => [...prev, assistantMessage]);
// Speak the response using TTS
speak(responseText);
} else {
// Token might be expired, clear and retry once
if (data.status === '401 Unauthorized') {
@ -413,7 +791,7 @@ export default function ChatScreen() {
} finally {
setIsSending(false);
}
}, [isSending, getWellNuoToken, customDeploymentId, currentBeneficiary, beneficiaries, speak]);
}, [isSending, getWellNuoToken, customDeploymentId, currentBeneficiary, beneficiaries]);
// Render message bubble
const renderMessage = ({ item }: { item: Message }) => {
@ -421,7 +799,7 @@ export default function ChatScreen() {
const isVoice = item.isVoice;
const isSystem = item.isSystem;
// System messages
// System messages (like "Voice Call Transcript" separator)
if (isSystem) {
return (
<View style={styles.systemMessageContainer}>
@ -442,7 +820,12 @@ export default function ChatScreen() {
<Text style={styles.avatarText}>J</Text>
</View>
)}
<View style={[styles.messageBubble, isUser ? styles.userBubble : styles.assistantBubble]}>
<View style={[styles.messageBubble, isUser ? styles.userBubble : styles.assistantBubble, isVoice && styles.voiceBubble]}>
{isVoice && (
<View style={styles.voiceIndicator}>
<Text style={styles.voiceIndicatorEmoji}>🎤</Text>
</View>
)}
<Text style={[styles.messageText, isUser ? styles.userMessageText : styles.assistantMessageText]}>
{item.content}
</Text>
@ -472,15 +855,6 @@ export default function ChatScreen() {
</Text>
</View>
</View>
{/* TTS Stop button - only visible when speaking */}
{isSpeaking && (
<TouchableOpacity
style={[styles.headerButton, styles.speakingButton]}
onPress={stopTTS}
>
<Ionicons name="volume-high" size={22} color={AppColors.primary} />
</TouchableOpacity>
)}
<TouchableOpacity
style={styles.headerButton}
onPress={() => setSortNewestFirst(prev => !prev)}
@ -494,7 +868,6 @@ export default function ChatScreen() {
<TouchableOpacity
style={styles.headerButton}
onPress={() => {
stopTTS(); // Stop TTS when clearing chat
Alert.alert(
'Clear Chat',
'Are you sure you want to clear all messages?',
@ -508,7 +881,7 @@ export default function ChatScreen() {
{
id: '1',
role: 'assistant',
content: 'Hello! I\'m Julia, your AI wellness assistant. Type a message below to chat with me.',
content: 'Hello! I\'m Julia, your AI wellness assistant. You can type a message or tap the phone button to start a voice call.',
timestamp: new Date(),
},
]);
@ -578,6 +951,53 @@ export default function ChatScreen() {
</View>
</Modal>
{/* Debug Logs Modal */}
<Modal
visible={showDebugPanel}
transparent
animationType="slide"
onRequestClose={() => setShowDebugPanel(false)}
>
<View style={styles.modalOverlay}>
<View style={[styles.modalContent, styles.debugModalContent]}>
<View style={styles.modalHeader}>
<Text style={styles.modalTitle}>Debug Logs ({debugLogs.length})</Text>
<View style={styles.debugHeaderButtons}>
<TouchableOpacity style={styles.debugHeaderBtn} onPress={copyLogsToClipboard}>
<Ionicons name="copy-outline" size={20} color={AppColors.primary} />
</TouchableOpacity>
<TouchableOpacity style={styles.debugHeaderBtn} onPress={clearDebugLogs}>
<Ionicons name="trash-outline" size={20} color={AppColors.error} />
</TouchableOpacity>
<TouchableOpacity onPress={() => setShowDebugPanel(false)}>
<Ionicons name="close" size={24} color={AppColors.textPrimary} />
</TouchableOpacity>
</View>
</View>
<ScrollView style={styles.debugLogsContainer}>
{debugLogs.length === 0 ? (
<Text style={styles.debugEmptyText}>No logs yet. Start a voice call to see logs.</Text>
) : (
debugLogs.map(log => (
<View key={log.id} style={styles.debugLogEntry}>
<Text style={styles.debugTimestamp}>{log.timestamp}</Text>
<Text style={[
styles.debugMessage,
log.level === 'error' && styles.debugError,
log.level === 'warn' && styles.debugWarn,
log.level === 'success' && styles.debugSuccess,
]}>
{log.message}
</Text>
</View>
))
)}
</ScrollView>
</View>
</View>
</Modal>
{/* Messages */}
<KeyboardAvoidingView
style={styles.chatContainer}
@ -609,6 +1029,47 @@ export default function ChatScreen() {
{/* Input */}
<View style={styles.inputContainer}>
{/* Voice Call Button - becomes pulsing bubble during call */}
<Animated.View style={{ transform: [{ scale: pulseAnim }] }}>
<TouchableOpacity
style={[
styles.voiceButton,
isConnectingVoice && styles.voiceButtonConnecting,
isCallActive && styles.voiceButtonActive,
]}
onPress={isCallActive ? endVoiceCall : startVoiceCall}
disabled={isConnectingVoice}
>
{isConnectingVoice ? (
<ActivityIndicator size="small" color={AppColors.primary} />
) : isCallActive ? (
<View style={styles.callActiveIndicator}>
<Ionicons name="call" size={20} color={AppColors.white} />
</View>
) : (
<Ionicons name="call" size={20} color={AppColors.primary} />
)}
</TouchableOpacity>
</Animated.View>
{/* Call duration badge */}
{isCallActive && (
<View style={styles.callDurationBadge}>
<Text style={styles.callDurationText}>
{Math.floor(callState.callDuration / 60).toString().padStart(2, '0')}:
{(callState.callDuration % 60).toString().padStart(2, '0')}
</Text>
</View>
)}
{/* Audio output button - only during active call */}
{isCallActive && (
<TouchableOpacity
style={styles.audioButton}
onPress={showAudioPicker}
>
<Ionicons name="volume-high" size={20} color={AppColors.primary} />
</TouchableOpacity>
)}
<TextInput
style={styles.input}
placeholder="Type a message..."
@ -632,6 +1093,38 @@ export default function ChatScreen() {
</TouchableOpacity>
</View>
</KeyboardAvoidingView>
{/* Invisible LiveKit Room - runs in background during call */}
{isCallActive && callState.token && callState.wsUrl && (
<LiveKitRoom
serverUrl={callState.wsUrl}
token={callState.token}
connect={true}
audio={true}
video={false}
onConnected={() => {
console.log('[Chat] LiveKit connected');
addDebugLog('LiveKitRoom: CONNECTED to server!', 'success');
}}
onDisconnected={() => {
addDebugLog('LiveKitRoom: DISCONNECTED', 'warn');
endVoiceCall();
}}
onError={(error) => {
const errorMsg = error?.message || 'Unknown error';
addDebugLog(`LiveKitRoom ERROR: ${errorMsg}`, 'error');
console.error('[Chat] LiveKit error:', error);
Alert.alert('Voice Call Error', error.message);
endVoiceCall();
}}
>
<VoiceCallTranscriptHandler
onTranscript={handleVoiceTranscript}
onDurationUpdate={updateDuration}
onLog={addDebugLog}
/>
</LiveKitRoom>
)}
</SafeAreaView>
);
}
@ -687,10 +1180,6 @@ const styles = StyleSheet.create({
padding: Spacing.xs,
marginLeft: Spacing.sm,
},
speakingButton: {
backgroundColor: AppColors.primaryLight || '#E3F2FD',
borderRadius: BorderRadius.full,
},
chatContainer: {
flex: 1,
},
@ -774,6 +1263,59 @@ const styles = StyleSheet.create({
maxHeight: 100,
marginRight: Spacing.sm,
},
voiceButton: {
width: 44,
height: 44,
borderRadius: BorderRadius.full,
backgroundColor: AppColors.surface,
justifyContent: 'center',
alignItems: 'center',
marginRight: Spacing.sm,
borderWidth: 1,
borderColor: AppColors.primary,
},
voiceButtonConnecting: {
borderColor: AppColors.success,
backgroundColor: 'rgba(90, 200, 168, 0.1)',
},
voiceButtonActive: {
backgroundColor: AppColors.error,
borderColor: AppColors.error,
},
audioButton: {
width: 44,
height: 44,
borderRadius: 22,
backgroundColor: AppColors.surface,
justifyContent: 'center',
alignItems: 'center',
marginRight: Spacing.sm,
borderWidth: 1,
borderColor: AppColors.primary,
},
callActiveIndicator: {
width: '100%',
height: '100%',
justifyContent: 'center',
alignItems: 'center',
},
callDurationBadge: {
position: 'absolute',
left: 32,
top: -8,
backgroundColor: AppColors.error,
paddingHorizontal: 6,
paddingVertical: 2,
borderRadius: 8,
minWidth: 42,
alignItems: 'center',
},
callDurationText: {
fontSize: 10,
fontWeight: '600',
color: AppColors.white,
fontVariant: ['tabular-nums'],
},
sendButton: {
width: 44,
height: 44,
@ -895,6 +1437,19 @@ const styles = StyleSheet.create({
fontWeight: '500',
color: AppColors.textPrimary,
},
// Voice message styles
voiceBubble: {
borderWidth: 1,
borderColor: 'rgba(59, 130, 246, 0.3)',
},
voiceIndicator: {
position: 'absolute',
top: 6,
right: 6,
},
voiceIndicatorEmoji: {
fontSize: 10,
},
// System message styles
systemMessageContainer: {
flexDirection: 'row',
@ -921,4 +1476,59 @@ const styles = StyleSheet.create({
color: AppColors.textMuted,
marginLeft: 4,
},
// Debug panel styles
debugButtonActive: {
backgroundColor: 'rgba(59, 130, 246, 0.1)',
},
debugModalContent: {
maxHeight: '80%',
},
debugHeaderButtons: {
flexDirection: 'row',
alignItems: 'center',
gap: Spacing.md,
},
debugHeaderBtn: {
padding: Spacing.xs,
},
debugLogsContainer: {
flex: 1,
padding: Spacing.sm,
backgroundColor: '#1a1a2e',
},
debugEmptyText: {
color: AppColors.textMuted,
textAlign: 'center',
padding: Spacing.lg,
fontSize: FontSizes.sm,
},
debugLogEntry: {
flexDirection: 'row',
paddingVertical: 3,
borderBottomWidth: 1,
borderBottomColor: 'rgba(255,255,255,0.05)',
},
debugTimestamp: {
color: '#6b7280',
fontSize: 11,
fontFamily: Platform.OS === 'ios' ? 'Menlo' : 'monospace',
marginRight: Spacing.sm,
minWidth: 90,
},
debugMessage: {
color: '#e5e7eb',
fontSize: 11,
fontFamily: Platform.OS === 'ios' ? 'Menlo' : 'monospace',
flex: 1,
flexWrap: 'wrap',
},
debugError: {
color: '#ef4444',
},
debugWarn: {
color: '#f59e0b',
},
debugSuccess: {
color: '#10b981',
},
});

View File

@ -1,3 +1,6 @@
// WebRTC globals are now registered in useLiveKitRoom hook
// before any LiveKit classes are loaded.
import { useEffect } from 'react';
import { DarkTheme, DefaultTheme, ThemeProvider } from '@react-navigation/native';
import { Stack, router, useSegments } from 'expo-router';
@ -12,7 +15,6 @@ import { AuthProvider, useAuth } from '@/contexts/AuthContext';
import { BeneficiaryProvider } from '@/contexts/BeneficiaryContext';
import { VoiceTranscriptProvider } from '@/contexts/VoiceTranscriptContext';
import { VoiceCallProvider } from '@/contexts/VoiceCallContext';
import { VoiceProvider } from '@/contexts/VoiceContext';
import { LoadingSpinner } from '@/components/ui/LoadingSpinner';
import { FloatingCallBubble } from '@/components/FloatingCallBubble';
@ -68,9 +70,7 @@ export default function RootLayout() {
<BeneficiaryProvider>
<VoiceTranscriptProvider>
<VoiceCallProvider>
<VoiceProvider>
<RootLayoutNav />
</VoiceProvider>
<RootLayoutNav />
</VoiceCallProvider>
</VoiceTranscriptProvider>
</BeneficiaryProvider>

View File

@ -1,222 +0,0 @@
/**
* Voice Floating Action Button Component
*
* A floating action button for toggling voice listening mode.
* Tap to start/stop listening.
* Hidden when a call is already active.
*/
import React, { useRef, useEffect } from 'react';
import {
StyleSheet,
TouchableOpacity,
Animated,
ViewStyle,
} from 'react-native';
import { Ionicons } from '@expo/vector-icons';
import { useSafeAreaInsets } from 'react-native-safe-area-context';
import * as Haptics from 'expo-haptics';
import { AppColors, BorderRadius } from '@/constants/theme';
import { useVoiceCall } from '@/contexts/VoiceCallContext';
interface VoiceFABProps {
onPress: () => void;
style?: ViewStyle;
disabled?: boolean;
isListening?: boolean;
}
const FAB_SIZE = 56;
export function VoiceFAB({ onPress, style, disabled = false, isListening = false }: VoiceFABProps) {
const { isCallActive } = useVoiceCall();
const insets = useSafeAreaInsets();
// Animation values
const scale = useRef(new Animated.Value(1)).current;
const opacity = useRef(new Animated.Value(1)).current;
const pulseScale = useRef(new Animated.Value(1)).current;
const pulseOpacity = useRef(new Animated.Value(0)).current;
// Hide FAB when call is active
useEffect(() => {
if (isCallActive) {
Animated.parallel([
Animated.timing(scale, {
toValue: 0,
duration: 200,
useNativeDriver: true,
}),
Animated.timing(opacity, {
toValue: 0,
duration: 200,
useNativeDriver: true,
}),
]).start();
} else {
Animated.parallel([
Animated.spring(scale, {
toValue: 1,
friction: 5,
tension: 40,
useNativeDriver: true,
}),
Animated.timing(opacity, {
toValue: 1,
duration: 200,
useNativeDriver: true,
}),
]).start();
}
}, [isCallActive, scale, opacity]);
// Pulse animation when listening
useEffect(() => {
if (isListening && !isCallActive) {
// Start pulsing animation
const pulseAnimation = Animated.loop(
Animated.sequence([
Animated.parallel([
Animated.timing(pulseScale, {
toValue: 1.8,
duration: 1000,
useNativeDriver: true,
}),
Animated.timing(pulseOpacity, {
toValue: 0,
duration: 1000,
useNativeDriver: true,
}),
]),
Animated.parallel([
Animated.timing(pulseScale, {
toValue: 1,
duration: 0,
useNativeDriver: true,
}),
Animated.timing(pulseOpacity, {
toValue: 0.6,
duration: 0,
useNativeDriver: true,
}),
]),
])
);
pulseAnimation.start();
return () => {
pulseAnimation.stop();
pulseScale.setValue(1);
pulseOpacity.setValue(0);
};
} else {
pulseScale.setValue(1);
pulseOpacity.setValue(0);
}
}, [isListening, isCallActive, pulseScale, pulseOpacity]);
// Press animation with haptic feedback
const handlePressIn = () => {
Haptics.impactAsync(Haptics.ImpactFeedbackStyle.Medium);
Animated.spring(scale, {
toValue: 0.9,
friction: 5,
useNativeDriver: true,
}).start();
};
const handlePressOut = () => {
Animated.spring(scale, {
toValue: 1,
friction: 5,
useNativeDriver: true,
}).start();
};
// Don't render if call is active
if (isCallActive) {
return null;
}
return (
<Animated.View
style={[
styles.container,
{
bottom: insets.bottom + 80, // Above tab bar
transform: [{ scale }],
opacity,
},
style,
]}
>
{/* Pulse ring when listening */}
{isListening && (
<Animated.View
style={[
styles.pulseRing,
{
transform: [{ scale: pulseScale }],
opacity: pulseOpacity,
},
]}
/>
)}
<TouchableOpacity
style={[
styles.fab,
isListening && styles.fabListening,
disabled && styles.fabDisabled,
]}
onPress={onPress}
onPressIn={handlePressIn}
onPressOut={handlePressOut}
disabled={disabled}
activeOpacity={0.9}
>
<Ionicons
name={isListening ? 'mic' : 'mic-outline'}
size={28}
color={disabled ? AppColors.textMuted : AppColors.white}
/>
</TouchableOpacity>
</Animated.View>
);
}
const styles = StyleSheet.create({
container: {
position: 'absolute',
left: 0,
right: 0,
alignItems: 'center',
zIndex: 100,
},
pulseRing: {
position: 'absolute',
width: FAB_SIZE,
height: FAB_SIZE,
borderRadius: BorderRadius.full,
backgroundColor: AppColors.error,
},
fab: {
width: FAB_SIZE,
height: FAB_SIZE,
borderRadius: BorderRadius.full,
backgroundColor: AppColors.success,
justifyContent: 'center',
alignItems: 'center',
shadowColor: '#000',
shadowOffset: { width: 0, height: 4 },
shadowOpacity: 0.3,
shadowRadius: 8,
elevation: 8,
},
fabListening: {
backgroundColor: AppColors.error,
},
fabDisabled: {
backgroundColor: AppColors.surface,
shadowOpacity: 0.1,
},
});

View File

@ -12,7 +12,7 @@ interface VoiceCallState {
isActive: boolean;
// Whether the call UI is minimized (showing bubble instead of full screen)
isMinimized: boolean;
// Voice service connection details
// LiveKit connection details
token: string | undefined;
wsUrl: string | undefined;
// Call metadata

View File

@ -1,410 +0,0 @@
/**
* Voice Context - Local STT/TTS integration with WellNuo API
*
* Provides voice session management:
* - STT (Speech-to-Text) via expo-speech-recognition
* - API calls to WellNuo ask_wellnuo_ai
* - TTS (Text-to-Speech) via expo-speech
*
* Flow: User speaks STT API Response TTS Continue listening
*/
import React, {
createContext,
useContext,
useState,
useCallback,
useRef,
ReactNode,
} from 'react';
import * as Speech from 'expo-speech';
import { api } from '@/services/api';
import { useVoiceTranscript } from './VoiceTranscriptContext';
// WellNuo API configuration (same as chat.tsx)
const API_URL = 'https://eluxnetworks.net/function/well-api/api';
const WELLNUO_USER = 'anandk';
const WELLNUO_PASSWORD = 'anandk_8';
// Single deployment mode - sends only deployment_id (no beneficiary_names_dict)
const SINGLE_DEPLOYMENT_MODE = true;
// Keywords for question normalization (same as chat.tsx)
const STATUS_KEYWORDS = [
/\bhow\s+is\b/i,
/\bhow'?s\b/i,
/\bhow\s+are\b/i,
/\btell\s+me\s+about\b/i,
/\bwhat'?s\s+up\s+with\b/i,
/\bupdate\s+on\b/i,
/\bstatus\b/i,
/\bdoing\b/i,
/\bfeeling\b/i,
/\bcheck\s+on\b/i,
/\bis\s+\w+\s+okay\b/i,
/\bis\s+\w+\s+alright\b/i,
/\bis\s+\w+\s+fine\b/i,
/\bokay\?\b/i,
/\balright\?\b/i,
];
const SUBJECT_KEYWORDS = [
/\bdad\b/i,
/\bfather\b/i,
/\bferdinand\b/i,
/\bhim\b/i,
/\bhe\b/i,
/\bmy\s+dad\b/i,
/\bmy\s+father\b/i,
/\bthe\s+patient\b/i,
/\bloved\s+one\b/i,
/\bparent\b/i,
/\bgrandpa\b/i,
/\bgrandfather\b/i,
];
/**
* Normalize question for WellNuo API (same logic as chat.tsx)
*/
function normalizeQuestion(userMessage: string): string {
const msgLower = userMessage.toLowerCase().trim();
const isStatusQuery = STATUS_KEYWORDS.some((pattern) => pattern.test(msgLower));
const isAboutRecipient = SUBJECT_KEYWORDS.some((pattern) => pattern.test(msgLower));
if (isStatusQuery && isAboutRecipient) {
console.log(`[VoiceContext] Normalized '${userMessage}' -> 'how is dad doing'`);
return 'how is dad doing';
}
if (isStatusQuery && !isAboutRecipient) {
console.log(
`[VoiceContext] Normalized '${userMessage}' -> 'how is dad doing' (assumed recipient)`
);
return 'how is dad doing';
}
console.log(`[VoiceContext] No normalization applied to: '${userMessage}'`);
return userMessage;
}
export type VoiceStatus = 'idle' | 'listening' | 'processing' | 'speaking';
interface VoiceContextValue {
// Current status of the voice session
status: VoiceStatus;
// Whether voice session is active (not idle)
isActive: boolean;
// Whether STT is currently listening
isListening: boolean;
// Whether TTS is currently speaking
isSpeaking: boolean;
// Whether processing API request
isProcessing: boolean;
// Current/last transcript from STT
transcript: string;
// Partial transcript (real-time preview)
partialTranscript: string;
// Last API response
lastResponse: string | null;
// Error message if any
error: string | null;
// Start voice session (begin listening)
startSession: () => void;
// Stop voice session
stopSession: () => void;
// Send transcript to API and get response with TTS
// Called automatically when STT detects speech end, or manually
sendTranscript: (text: string) => Promise<string | null>;
// Update transcript from external STT hook
setTranscript: (text: string) => void;
setPartialTranscript: (text: string) => void;
// Set status from external STT/TTS hooks
setStatus: (status: VoiceStatus) => void;
setIsListening: (listening: boolean) => void;
setIsSpeaking: (speaking: boolean) => void;
// Speak text using TTS
speak: (text: string) => Promise<void>;
// Stop TTS
stopSpeaking: () => void;
// Interrupt TTS if speaking (call when user starts talking)
interruptIfSpeaking: () => boolean;
}
const VoiceContext = createContext<VoiceContextValue | undefined>(undefined);
export function VoiceProvider({ children }: { children: ReactNode }) {
const [status, setStatus] = useState<VoiceStatus>('idle');
const [transcript, setTranscript] = useState('');
const [partialTranscript, setPartialTranscript] = useState('');
const [lastResponse, setLastResponse] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
const [isListening, setIsListening] = useState(false);
const [isSpeaking, setIsSpeaking] = useState(false);
// Voice transcript context for chat display
const { addTranscriptEntry } = useVoiceTranscript();
// API token cache
const apiTokenRef = useRef<string | null>(null);
// Deployment ID from settings
const deploymentIdRef = useRef<string | null>(null);
// Load deployment ID on mount
React.useEffect(() => {
const loadDeploymentId = async () => {
const savedId = await api.getDeploymentId();
deploymentIdRef.current = savedId;
console.log('[VoiceContext] Loaded deployment ID:', savedId);
};
loadDeploymentId();
}, []);
/**
* Get WellNuo API token (same as chat.tsx)
*/
const getWellNuoToken = useCallback(async (): Promise<string> => {
if (apiTokenRef.current) {
return apiTokenRef.current;
}
const nonce = Math.floor(Math.random() * 1000000).toString();
const response = await fetch(API_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: new URLSearchParams({
function: 'credentials',
clientId: 'MA_001',
user_name: WELLNUO_USER,
ps: WELLNUO_PASSWORD,
nonce: nonce,
}).toString(),
});
const data = await response.json();
if (data.status === '200 OK' && data.access_token) {
apiTokenRef.current = data.access_token;
console.log('[VoiceContext] WellNuo token obtained');
return data.access_token;
}
throw new Error('Failed to authenticate with WellNuo API');
}, []);
/**
* Send transcript to WellNuo API and speak the response
*/
const sendTranscript = useCallback(
async (text: string): Promise<string | null> => {
const trimmedText = text.trim();
if (!trimmedText) {
console.log('[VoiceContext] Empty transcript, skipping API call');
return null;
}
console.log('[VoiceContext] Sending transcript to API:', trimmedText);
setStatus('processing');
setError(null);
// Add user message to transcript for chat display
addTranscriptEntry('user', trimmedText);
try {
// Get API token
const token = await getWellNuoToken();
// Normalize question
const normalizedQuestion = normalizeQuestion(trimmedText);
// Get deployment ID
const deploymentId = deploymentIdRef.current || '21';
// Build request params
const requestParams: Record<string, string> = {
function: 'ask_wellnuo_ai',
clientId: 'MA_001',
user_name: WELLNUO_USER,
token: token,
question: normalizedQuestion,
deployment_id: deploymentId,
};
// Only add beneficiary_names_dict if NOT in single deployment mode
if (!SINGLE_DEPLOYMENT_MODE) {
// For full app, would include beneficiary names dict
// Currently single deployment mode only
}
const response = await fetch(API_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: new URLSearchParams(requestParams).toString(),
});
const data = await response.json();
if (data.ok && data.response?.body) {
const responseText = data.response.body;
console.log('[VoiceContext] API response:', responseText.slice(0, 100) + '...');
setLastResponse(responseText);
// Add Julia's response to transcript for chat display
addTranscriptEntry('assistant', responseText);
// Speak the response
await speak(responseText);
return responseText;
} else {
// Token might be expired
if (data.status === '401 Unauthorized') {
apiTokenRef.current = null;
throw new Error('Session expired, please try again');
}
throw new Error(data.message || 'Could not get response');
}
} catch (err) {
const errorMsg = err instanceof Error ? err.message : 'Unknown error';
console.error('[VoiceContext] API error:', errorMsg);
setError(errorMsg);
setStatus('idle');
return null;
}
},
[getWellNuoToken, addTranscriptEntry]
);
/**
* Interrupt TTS when user starts speaking
* Call this from the STT hook when voice activity is detected
*/
const interruptIfSpeaking = useCallback(() => {
if (isSpeaking) {
console.log('[VoiceContext] User interrupted - stopping TTS');
Speech.stop();
setIsSpeaking(false);
setStatus('listening');
return true;
}
return false;
}, [isSpeaking]);
/**
* Speak text using TTS
*/
const speak = useCallback(async (text: string): Promise<void> => {
if (!text.trim()) return;
console.log('[VoiceContext] Speaking:', text.slice(0, 50) + '...');
setStatus('speaking');
setIsSpeaking(true);
return new Promise((resolve) => {
Speech.speak(text, {
language: 'en-US',
rate: 0.9,
pitch: 1.0,
onStart: () => {
console.log('[VoiceContext] TTS started');
},
onDone: () => {
console.log('[VoiceContext] TTS completed');
setIsSpeaking(false);
// Return to listening state after speaking (if session is active)
setStatus('listening');
resolve();
},
onError: (error) => {
console.error('[VoiceContext] TTS error:', error);
setIsSpeaking(false);
setStatus('listening');
resolve();
},
onStopped: () => {
console.log('[VoiceContext] TTS stopped (interrupted)');
setIsSpeaking(false);
setStatus('listening');
resolve();
},
});
});
}, []);
/**
* Stop TTS playback
*/
const stopSpeaking = useCallback(() => {
Speech.stop();
setIsSpeaking(false);
}, []);
/**
* Start voice session
*/
const startSession = useCallback(() => {
console.log('[VoiceContext] Starting voice session');
setStatus('listening');
setIsListening(true);
setError(null);
setTranscript('');
setPartialTranscript('');
}, []);
/**
* Stop voice session
*/
const stopSession = useCallback(() => {
console.log('[VoiceContext] Stopping voice session');
Speech.stop();
setStatus('idle');
setIsListening(false);
setIsSpeaking(false);
setError(null);
}, []);
// Computed values
const isActive = status !== 'idle';
const isProcessing = status === 'processing';
return (
<VoiceContext.Provider
value={{
status,
isActive,
isListening,
isSpeaking,
isProcessing,
transcript,
partialTranscript,
lastResponse,
error,
startSession,
stopSession,
sendTranscript,
setTranscript,
setPartialTranscript,
setStatus,
setIsListening,
setIsSpeaking,
speak,
stopSpeaking,
interruptIfSpeaking,
}}
>
{children}
</VoiceContext.Provider>
);
}
export function useVoice() {
const context = useContext(VoiceContext);
if (!context) {
throw new Error('useVoice must be used within VoiceProvider');
}
return context;
}

View File

@ -0,0 +1,279 @@
# Julia AI Voice Integration
## Architecture Overview
```
┌─────────────────────────────────────────────────────────────────┐
│ WellNuo Lite App (iOS) │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ Voice Call Screen (app/voice-call.tsx) │ │
│ │ - useLiveKitRoom hook │ │
│ │ - Audio session management │ │
│ │ - Microphone permission handling │ │
│ └───────────────────────┬─────────────────────────────────┘ │
│ │ WebSocket + WebRTC │
└──────────────────────────┼──────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────┐
│ LiveKit Cloud │
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
│ │ SFU Server │ │ Room Mgmt │ │ Agent Hosting │ │
│ │ (WebRTC) │ │ (Token Auth) │ │ (Python) │ │
│ └────────┬────────┘ └─────────────────┘ └────────┬────────┘ │
│ │ │ │
│ └──────────────────────────────────────────┘ │
│ │ Audio Streams │
└──────────────────────────┼──────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────┐
│ Julia AI Agent (Python) │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │
│ │ Deepgram │ │ Deepgram │ │ WellNuo voice_ask API │ │
│ │ STT │ │ TTS │ │ (Custom LLM backend) │ │
│ │ (Nova-2) │ │ (Aura) │ │ │ │
│ └─────────────┘ └─────────────┘ └─────────────────────────┘ │
└─────────────────────────────────────────────────────────────────┘
```
## Components
### 1. React Native Client
**Location:** `app/voice-call.tsx`, `hooks/useLiveKitRoom.ts`
**Dependencies:**
- `@livekit/react-native` - LiveKit React Native SDK
- `@livekit/react-native-webrtc` - WebRTC for React Native
- `expo-av` - Audio session management
**Key Features:**
- Connects to LiveKit room with JWT token
- Manages audio session (activates speaker mode)
- Handles microphone permissions
- Displays connection state and transcription
### 2. LiveKit Cloud
**Project:** `live-kit-demo-70txlh6a`
**Agent ID:** `CA_Yd3qcuYEVKKE`
**Configuration:**
- Auto-scaling agent workers
- Managed STT/TTS through inference endpoints
- Built-in noise cancellation
**Getting Tokens:**
```typescript
// From WellNuo backend
const response = await fetch('/api/livekit/token', {
method: 'POST',
body: JSON.stringify({ roomName, userName })
});
const { token, url } = await response.json();
```
### 3. Julia AI Agent (Python)
**Location:** `julia-agent/julia-ai/src/agent.py`
**Stack:**
- LiveKit Agents SDK
- Deepgram Nova-2 (STT)
- Deepgram Aura Asteria (TTS - female voice)
- Silero VAD (Voice Activity Detection)
- Custom WellNuo LLM (voice_ask API)
## Setup & Deployment
### Prerequisites
1. **LiveKit Cloud Account**
- Sign up at https://cloud.livekit.io/
- Create a project
- Get API credentials
2. **LiveKit CLI**
```bash
# macOS
brew install livekit-cli
# Login
lk cloud auth
```
### Agent Deployment
1. **Navigate to agent directory:**
```bash
cd julia-agent/julia-ai
```
2. **Install dependencies:**
```bash
uv sync
```
3. **Configure environment:**
```bash
cp .env.example .env.local
# Add LIVEKIT_URL, LIVEKIT_API_KEY, LIVEKIT_API_SECRET
```
4. **Local development:**
```bash
uv run python src/agent.py dev
```
5. **Deploy to LiveKit Cloud:**
```bash
lk agent deploy
```
### React Native Setup
1. **Install packages:**
```bash
npm install @livekit/react-native @livekit/react-native-webrtc
```
2. **iOS permissions (Info.plist):**
```xml
<key>NSMicrophoneUsageDescription</key>
<string>WellNuo needs microphone access for voice calls with Julia AI</string>
```
3. **Pod install:**
```bash
cd ios && pod install
```
## Flow Diagram
```
User opens Voice tab
Request microphone permission
├─ Denied → Show error
Get LiveKit token from WellNuo API
Connect to LiveKit room
Agent joins automatically (LiveKit Cloud)
Agent sends greeting (TTS)
User speaks → STT → WellNuo API → Response → TTS
User ends call → Disconnect from room
```
## API Integration
### WellNuo voice_ask API
The agent uses WellNuo's `voice_ask` API to get contextual responses about the beneficiary.
**Endpoint:** `https://eluxnetworks.net/function/well-api/api`
**Authentication:**
```python
data = {
"function": "credentials",
"clientId": "001",
"user_name": WELLNUO_USER,
"ps": WELLNUO_PASSWORD,
"nonce": str(random.randint(0, 999999)),
}
```
**Voice Ask:**
```python
data = {
"function": "voice_ask",
"clientId": "001",
"user_name": WELLNUO_USER,
"token": token,
"question": user_message,
"deployment_id": DEPLOYMENT_ID,
}
```
## Troubleshooting
### Common Issues
1. **No audio playback on iOS**
- Check audio session configuration
- Ensure `expo-av` is properly configured
- Test on real device (simulator has audio limitations)
2. **Microphone not working**
- Verify permissions in Info.plist
- Check if user granted permission
- Real device required for full audio testing
3. **Agent not responding**
- Check agent logs: `lk agent logs`
- Verify LIVEKIT credentials
- Check WellNuo API connectivity
4. **Connection fails**
- Verify token is valid
- Check network connectivity
- Ensure LiveKit URL is correct
### Debugging
```bash
# View agent logs
lk agent logs
# View specific deployment logs
lk agent logs --version v20260119031418
# Check agent status
lk agent list
```
## Environment Variables
### Agent (.env.local)
```
LIVEKIT_URL=wss://live-kit-demo-70txlh6a.livekit.cloud
LIVEKIT_API_KEY=your-api-key
LIVEKIT_API_SECRET=your-api-secret
WELLNUO_USER=anandk
WELLNUO_PASSWORD=anandk_8
DEPLOYMENT_ID=21
```
### React Native (via WellNuo backend)
Token generation handled server-side for security.
## Status
**Current State:** WIP - Not tested on real device
**Working:**
- Agent deploys to LiveKit Cloud
- Agent connects to rooms
- STT/TTS pipeline configured
- WellNuo API integration
- React Native UI
**Needs Testing:**
- Real device microphone capture
- Audio playback on physical iOS device
- Full conversation loop end-to-end
- Token refresh/expiration handling

707
hooks/useLiveKitRoom.ts Normal file
View File

@ -0,0 +1,707 @@
/**
* useLiveKitRoom - Hook for LiveKit voice call with Julia AI
*
* IMPORTANT: This hook encapsulates ALL LiveKit logic.
* The UI component should only use the returned state and actions.
*
* LOGGING: Maximum transparency - every step is logged!
*/
import { useState, useCallback, useRef, useEffect } from 'react';
import { Platform, AppState, AppStateStatus, NativeModules } from 'react-native';
import type { Room as RoomType } from 'livekit-client';
// Helper to detect iOS Simulator
// Expo Go and production builds both work with this approach
const isIOSSimulator = (): boolean => {
if (Platform.OS !== 'ios') return false;
// Check via DeviceInfo module if available
const { PlatformConstants } = NativeModules;
return PlatformConstants?.interfaceIdiom === 'simulator' ||
PlatformConstants?.isSimulator === true;
};
import { getToken, VOICE_NAME, BeneficiaryData } from '@/services/livekitService';
import {
configureAudioForVoiceCall,
stopAudioSession,
reconfigureAudioForPlayback,
} from '@/utils/audioSession';
import { callManager } from '@/services/callManager';
// Connection states
export type ConnectionState =
| 'idle'
| 'initializing'
| 'configuring_audio'
| 'requesting_token'
| 'connecting'
| 'connected'
| 'reconnecting'
| 'disconnecting'
| 'disconnected'
| 'error';
// Log entry type
export interface LogEntry {
timestamp: string; // Formatted time string (HH:MM:SS.mmm)
level: 'info' | 'warn' | 'error' | 'success';
message: string;
}
// Hook options
export interface UseLiveKitRoomOptions {
userId: string;
beneficiaryData?: BeneficiaryData;
onTranscript?: (role: 'user' | 'assistant', text: string) => void;
autoConnect?: boolean;
}
// Hook return type
export interface UseLiveKitRoomReturn {
// Connection state
state: ConnectionState;
error: string | null;
// Call info
roomName: string | null;
callDuration: number;
// Audio state
isMuted: boolean;
isAgentSpeaking: boolean;
canPlayAudio: boolean;
// Debug info
logs: LogEntry[];
participantCount: number;
// Actions
connect: () => Promise<void>;
disconnect: () => Promise<void>;
toggleMute: () => Promise<void>;
clearLogs: () => void;
}
/**
* Main hook for LiveKit voice calls
*/
export function useLiveKitRoom(options: UseLiveKitRoomOptions): UseLiveKitRoomReturn {
const { userId, beneficiaryData, onTranscript, autoConnect = false } = options;
// State
const [state, setState] = useState<ConnectionState>('idle');
const [error, setError] = useState<string | null>(null);
const [roomName, setRoomName] = useState<string | null>(null);
const [callDuration, setCallDuration] = useState(0);
const [isMuted, setIsMuted] = useState(false);
const [isAgentSpeaking, setIsAgentSpeaking] = useState(false);
const [canPlayAudio, setCanPlayAudio] = useState(false);
const [logs, setLogs] = useState<LogEntry[]>([]);
const [participantCount, setParticipantCount] = useState(0);
// Refs
const roomRef = useRef<RoomType | null>(null);
const callStartTimeRef = useRef<number | null>(null);
const connectionIdRef = useRef(0);
const isUnmountingRef = useRef(false);
const appStateRef = useRef<AppStateStatus>(AppState.currentState);
const callIdRef = useRef<string | null>(null);
// ===================
// LOGGING FUNCTIONS
// ===================
const log = useCallback((level: LogEntry['level'], message: string) => {
const now = new Date();
const timestamp = now.toLocaleTimeString('en-US', {
hour12: false,
hour: '2-digit',
minute: '2-digit',
second: '2-digit',
}) + '.' + now.getMilliseconds().toString().padStart(3, '0');
const entry: LogEntry = {
timestamp,
level,
message,
};
setLogs((prev) => [...prev, entry]);
// Also log to console with color
const prefix = `[LiveKit ${timestamp}]`;
switch (level) {
case 'error':
console.error(`${prefix} ERROR: ${message}`);
break;
case 'warn':
console.warn(`${prefix} WARN: ${message}`);
break;
case 'success':
console.log(`${prefix} SUCCESS: ${message}`);
break;
default:
console.log(`${prefix} INFO: ${message}`);
}
}, []);
const logInfo = useCallback((msg: string) => log('info', msg), [log]);
const logWarn = useCallback((msg: string) => log('warn', msg), [log]);
const logError = useCallback((msg: string) => log('error', msg), [log]);
const logSuccess = useCallback((msg: string) => log('success', msg), [log]);
const clearLogs = useCallback(() => {
setLogs([]);
}, []);
// ===================
// CONNECT FUNCTION
// ===================
const connect = useCallback(async () => {
// Prevent multiple concurrent connection attempts
const currentConnectionId = ++connectionIdRef.current;
// Generate unique call ID for this session
const callId = `call-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
callIdRef.current = callId;
logInfo('========== STARTING VOICE CALL ==========');
logInfo(`User ID: ${userId}`);
logInfo(`Platform: ${Platform.OS}`);
logInfo(`Connection ID: ${currentConnectionId}`);
logInfo(`Call ID: ${callId}`);
// Register with CallManager - this will disconnect any existing call
logInfo('Registering call with CallManager...');
await callManager.registerCall(callId, async () => {
logInfo('CallManager requested disconnect (another call starting)');
if (roomRef.current) {
await roomRef.current.disconnect();
roomRef.current = null;
}
await stopAudioSession();
});
logSuccess('Call registered with CallManager');
// Check if already connected
if (roomRef.current) {
logWarn('Already connected to a room, disconnecting first...');
await roomRef.current.disconnect();
roomRef.current = null;
}
try {
// ========== STEP 1: Initialize ==========
setState('initializing');
logInfo('STEP 1/6: Initializing...');
// Detect simulator vs real device
const isSimulator = isIOSSimulator();
logInfo(`Device type: ${isSimulator ? 'SIMULATOR' : 'REAL DEVICE'}`);
logInfo(`Device model: ${Platform.OS} ${Platform.Version}`);
if (isSimulator) {
logWarn('⚠️ SIMULATOR DETECTED - Microphone will NOT work!');
logWarn('Simulator can only test: connection, token, agent presence, TTS playback');
logWarn('For full STT test, use a real iPhone device');
}
// Check if connection was cancelled
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
logWarn('Connection cancelled (component unmounting or new connection started)');
return;
}
// ========== STEP 2: Register WebRTC Globals ==========
logInfo('STEP 2/6: Registering WebRTC globals...');
const { registerGlobals } = await import('@livekit/react-native');
if (typeof global.RTCPeerConnection === 'undefined') {
logInfo('RTCPeerConnection not found, calling registerGlobals()...');
registerGlobals();
logSuccess('WebRTC globals registered!');
} else {
logInfo('WebRTC globals already registered');
}
// Check again
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
logWarn('Connection cancelled after registerGlobals');
return;
}
// ========== STEP 3: Configure iOS Audio ==========
setState('configuring_audio');
logInfo('STEP 3/6: Configuring iOS AudioSession...');
await configureAudioForVoiceCall();
logSuccess('iOS AudioSession configured!');
// Check again
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
logWarn('Connection cancelled after audio config');
await stopAudioSession();
return;
}
// ========== STEP 4: Get Token ==========
setState('requesting_token');
logInfo('STEP 4/6: Requesting token from server...');
const tokenResult = await getToken(userId, beneficiaryData);
if (!tokenResult.success || !tokenResult.data) {
const errorMsg = tokenResult.error || 'Failed to get token';
logError(`Token request failed: ${errorMsg}`);
setError(errorMsg);
setState('error');
return;
}
const { token, wsUrl, roomName: room } = tokenResult.data;
setRoomName(room);
logSuccess(`Token received!`);
logInfo(` Room: ${room}`);
logInfo(` WebSocket URL: ${wsUrl}`);
logInfo(` Token length: ${token.length} chars`);
// Check again
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
logWarn('Connection cancelled after token');
await stopAudioSession();
return;
}
// ========== STEP 5: Import LiveKit and Create Room ==========
logInfo('STEP 5/6: Creating LiveKit Room...');
const { Room, RoomEvent, ConnectionState: LKConnectionState, Track } = await import(
'livekit-client'
);
logInfo(` Room class available: ${typeof Room === 'function'}`);
logInfo(` RoomEvent available: ${typeof RoomEvent === 'object'}`);
const lkRoom = new Room();
roomRef.current = lkRoom;
logSuccess('Room instance created!');
// ========== Setup Event Listeners ==========
logInfo('Setting up event listeners...');
// Connection state changes
lkRoom.on(RoomEvent.ConnectionStateChanged, (newState) => {
logInfo(`EVENT: ConnectionStateChanged -> ${newState}`);
switch (newState) {
case LKConnectionState.Connecting:
setState('connecting');
break;
case LKConnectionState.Connected:
setState('connected');
logSuccess('Connected to room!');
if (!callStartTimeRef.current) {
callStartTimeRef.current = Date.now();
logInfo('Call timer started');
}
break;
case LKConnectionState.Reconnecting:
setState('reconnecting');
logWarn('Reconnecting...');
break;
case LKConnectionState.Disconnected:
setState('disconnected');
logInfo('Disconnected from room');
break;
}
});
// Track subscribed (audio from agent)
lkRoom.on(RoomEvent.TrackSubscribed, async (track, publication, participant) => {
logInfo(`EVENT: TrackSubscribed`);
logInfo(` Track kind: ${track.kind}`);
logInfo(` Track source: ${track.source}`);
logInfo(` Participant: ${participant.identity}`);
logInfo(` Publication SID: ${publication.trackSid}`);
if (track.kind === Track.Kind.Audio) {
logSuccess(`Audio track from ${participant.identity} - should hear voice now!`);
setIsAgentSpeaking(true);
// Reconfigure audio for playback
logInfo('Reconfiguring audio for playback...');
await reconfigureAudioForPlayback();
}
});
// Track unsubscribed
lkRoom.on(RoomEvent.TrackUnsubscribed, (track, publication, participant) => {
logInfo(`EVENT: TrackUnsubscribed`);
logInfo(` Track kind: ${track.kind}`);
logInfo(` Participant: ${participant.identity}`);
if (track.kind === Track.Kind.Audio) {
setIsAgentSpeaking(false);
}
});
// Track muted/unmuted
lkRoom.on(RoomEvent.TrackMuted, (publication, participant) => {
logInfo(`EVENT: TrackMuted - ${publication.trackSid} by ${participant.identity}`);
});
lkRoom.on(RoomEvent.TrackUnmuted, (publication, participant) => {
logInfo(`EVENT: TrackUnmuted - ${publication.trackSid} by ${participant.identity}`);
});
// Participants
lkRoom.on(RoomEvent.ParticipantConnected, (participant) => {
logSuccess(`EVENT: ParticipantConnected - ${participant.identity}`);
setParticipantCount((c) => c + 1);
});
lkRoom.on(RoomEvent.ParticipantDisconnected, (participant) => {
logInfo(`EVENT: ParticipantDisconnected - ${participant.identity}`);
setParticipantCount((c) => Math.max(0, c - 1));
});
// Active speakers (voice activity)
lkRoom.on(RoomEvent.ActiveSpeakersChanged, (speakers) => {
if (speakers.length > 0) {
const speakerNames = speakers.map((s: any) => s.identity).join(', ');
logInfo(`EVENT: ActiveSpeakersChanged - ${speakerNames}`);
// Check if agent is speaking
const agentSpeaking = speakers.some((s: any) => s.identity.startsWith('agent'));
setIsAgentSpeaking(agentSpeaking);
}
});
// Local track published (our mic)
lkRoom.on(RoomEvent.LocalTrackPublished, (publication, participant) => {
logSuccess(`EVENT: LocalTrackPublished`);
logInfo(` Track: ${publication.trackSid}`);
logInfo(` Kind: ${publication.kind}`);
logInfo(` Source: ${publication.source}`);
});
// Audio playback status
lkRoom.on(RoomEvent.AudioPlaybackStatusChanged, () => {
const canPlay = lkRoom.canPlaybackAudio;
logInfo(`EVENT: AudioPlaybackStatusChanged - canPlaybackAudio: ${canPlay}`);
setCanPlayAudio(canPlay);
});
// Data received (transcripts)
lkRoom.on(RoomEvent.DataReceived, (payload, participant) => {
try {
const data = JSON.parse(new TextDecoder().decode(payload));
logInfo(`EVENT: DataReceived from ${participant?.identity || 'unknown'}`);
logInfo(` Type: ${data.type}`);
if (data.type === 'transcript' && onTranscript) {
logInfo(` Role: ${data.role}, Text: ${data.text?.substring(0, 50)}...`);
onTranscript(data.role, data.text);
}
} catch (e) {
// Non-JSON data, ignore
}
});
// Errors
lkRoom.on(RoomEvent.Disconnected, (reason) => {
logWarn(`EVENT: Disconnected - Reason: ${reason}`);
});
// Check again before connect
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
logWarn('Connection cancelled before room.connect()');
await stopAudioSession();
return;
}
// ========== STEP 6: Connect to Room ==========
setState('connecting');
logInfo('STEP 6/6: Connecting to LiveKit room...');
logInfo(` URL: ${wsUrl}`);
logInfo(` Room: ${room}`);
await lkRoom.connect(wsUrl, token, {
autoSubscribe: true,
});
logSuccess('Connected to room!');
// ========== CRITICAL: Start Audio Playback ==========
// This is REQUIRED for audio to play on iOS and Android!
// Without this call, remote audio tracks will NOT be heard.
logInfo('Starting audio playback (room.startAudio)...');
try {
await lkRoom.startAudio();
logSuccess(`Audio playback started! canPlaybackAudio: ${lkRoom.canPlaybackAudio}`);
setCanPlayAudio(lkRoom.canPlaybackAudio);
} catch (audioPlaybackErr: any) {
logError(`startAudio failed: ${audioPlaybackErr.message}`);
// Don't fail the whole call - audio might still work on some platforms
}
// Check if connection was cancelled after connect
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
logWarn('Connection cancelled after room.connect()');
await lkRoom.disconnect();
await stopAudioSession();
return;
}
// ========== Enable Microphone ==========
logInfo('Enabling microphone...');
try {
await lkRoom.localParticipant.setMicrophoneEnabled(true);
logSuccess('Microphone enabled!');
logInfo(` Local participant: ${lkRoom.localParticipant.identity}`);
// Log track info - CRITICAL for debugging!
const audioTracks = lkRoom.localParticipant.getTrackPublications();
logInfo(` Published tracks: ${audioTracks.length}`);
let micTrackFound = false;
audioTracks.forEach((pub) => {
logInfo(` - ${pub.kind}: ${pub.trackSid} (${pub.source})`);
logInfo(` isMuted: ${pub.isMuted}, isSubscribed: ${pub.isSubscribed}`);
if (pub.kind === 'audio' && pub.source === 'microphone') {
micTrackFound = true;
const track = pub.track;
if (track) {
logInfo(` Track mediaStreamTrack: ${track.mediaStreamTrack?.readyState || 'N/A'}`);
logInfo(` Track enabled: ${track.mediaStreamTrack?.enabled || 'N/A'}`);
} else {
logWarn(` WARNING: No track object on publication!`);
}
}
});
if (!micTrackFound) {
// Check if simulator
const isSimulator = isIOSSimulator();
if (isSimulator) {
logWarn('No microphone track - EXPECTED on simulator');
logInfo('Simulator test: check if Agent joined and TTS works');
} else {
logError('CRITICAL: No microphone track published! STT will NOT work!');
logError('Possible causes: permissions denied, AudioSession not configured, hardware issue');
}
} else {
logSuccess('Microphone track found and published - STT should work');
}
} catch (micError: any) {
logError(`Failed to enable microphone: ${micError.message}`);
logError(`Stack: ${micError.stack || 'N/A'}`);
// This is CRITICAL - user must know!
setError(`Microphone error: ${micError.message}`);
}
// Set initial participant count
setParticipantCount(lkRoom.remoteParticipants.size);
logInfo(`Remote participants: ${lkRoom.remoteParticipants.size}`);
logSuccess('========== VOICE CALL STARTED ==========');
} catch (err: any) {
// Ignore errors if unmounting
if (isUnmountingRef.current || currentConnectionId !== connectionIdRef.current) {
logWarn('Error ignored (component unmounting)');
return;
}
const errorMsg = err?.message || String(err);
logError(`Connection failed: ${errorMsg}`);
logError(`Stack: ${err?.stack || 'N/A'}`);
setError(errorMsg);
setState('error');
// Cleanup
await stopAudioSession();
}
}, [userId, beneficiaryData, onTranscript, logInfo, logWarn, logError, logSuccess]);
// ===================
// DISCONNECT FUNCTION
// ===================
const disconnect = useCallback(async () => {
logInfo('========== DISCONNECTING ==========');
setState('disconnecting');
// Unregister from CallManager
if (callIdRef.current) {
logInfo(`Unregistering call: ${callIdRef.current}`);
callManager.unregisterCall(callIdRef.current);
callIdRef.current = null;
}
try {
if (roomRef.current) {
logInfo('Disconnecting from room...');
await roomRef.current.disconnect();
roomRef.current = null;
logSuccess('Disconnected from room');
} else {
logInfo('No room to disconnect from');
}
} catch (err: any) {
logError(`Disconnect error: ${err.message}`);
}
logInfo('Stopping audio session...');
await stopAudioSession();
// Reset state
setState('disconnected');
setRoomName(null);
setIsMuted(false);
setIsAgentSpeaking(false);
setParticipantCount(0);
callStartTimeRef.current = null;
logSuccess('========== DISCONNECTED ==========');
}, [logInfo, logError, logSuccess]);
// ===================
// TOGGLE MUTE
// ===================
const toggleMute = useCallback(async () => {
if (!roomRef.current) {
logWarn('Cannot toggle mute - not connected');
return;
}
const newMuted = !isMuted;
logInfo(`Toggling mute: ${isMuted} -> ${newMuted}`);
try {
await roomRef.current.localParticipant.setMicrophoneEnabled(!newMuted);
setIsMuted(newMuted);
logSuccess(`Microphone ${newMuted ? 'muted' : 'unmuted'}`);
} catch (err: any) {
logError(`Failed to toggle mute: ${err.message}`);
}
}, [isMuted, logInfo, logWarn, logError, logSuccess]);
// ===================
// CALL DURATION TIMER
// ===================
useEffect(() => {
if (state !== 'connected') return;
const interval = setInterval(() => {
if (callStartTimeRef.current) {
const elapsed = Math.floor((Date.now() - callStartTimeRef.current) / 1000);
setCallDuration(elapsed);
}
}, 1000);
return () => clearInterval(interval);
}, [state]);
// ===================
// APP STATE HANDLING
// ===================
useEffect(() => {
const handleAppStateChange = (nextAppState: AppStateStatus) => {
const prevState = appStateRef.current;
appStateRef.current = nextAppState;
if (prevState.match(/inactive|background/) && nextAppState === 'active') {
logInfo('App returned to foreground');
} else if (prevState === 'active' && nextAppState.match(/inactive|background/)) {
logInfo('App went to background - call continues in background');
}
};
const subscription = AppState.addEventListener('change', handleAppStateChange);
return () => subscription.remove();
}, [logInfo]);
// ===================
// CLEANUP ON UNMOUNT
// ===================
useEffect(() => {
isUnmountingRef.current = false;
return () => {
isUnmountingRef.current = true;
// Cleanup
const cleanup = async () => {
// Unregister from CallManager
if (callIdRef.current) {
callManager.unregisterCall(callIdRef.current);
callIdRef.current = null;
}
if (roomRef.current) {
try {
await roomRef.current.disconnect();
} catch (e) {
// Ignore
}
roomRef.current = null;
}
await stopAudioSession();
};
cleanup();
};
}, []);
// ===================
// AUTO CONNECT
// ===================
useEffect(() => {
if (autoConnect && state === 'idle') {
connect();
}
}, [autoConnect, state, connect]);
// ===================
// RETURN
// ===================
return {
// Connection state
state,
error,
// Call info
roomName,
callDuration,
// Audio state
isMuted,
isAgentSpeaking,
canPlayAudio,
// Debug
logs,
participantCount,
// Actions
connect,
disconnect,
toggleMute,
clearLogs,
};
}
export { VOICE_NAME };

View File

@ -1,295 +0,0 @@
/**
* Speech Recognition Hook
*
* Wraps @jamsch/expo-speech-recognition for easy use in components.
* Provides start/stop controls, recognized text, and status states.
*
* Usage:
* ```typescript
* const { startListening, stopListening, isListening, recognizedText, error } = useSpeechRecognition();
*
* // Start listening (will request permissions if needed)
* await startListening();
*
* // Stop and get final result
* stopListening();
*
* // recognizedText contains the transcript
* ```
*/
import { useState, useCallback, useRef, useEffect } from 'react';
import {
ExpoSpeechRecognitionModule,
useSpeechRecognitionEvent,
} from '@jamsch/expo-speech-recognition';
import { Platform } from 'react-native';
export interface UseSpeechRecognitionOptions {
/** Language for recognition (default: 'en-US') */
lang?: string;
/** Whether to return interim results while speaking (default: true) */
interimResults?: boolean;
/** Whether to continue listening after pause (default: false) */
continuous?: boolean;
/** Whether to add punctuation (iOS only, default: true) */
addsPunctuation?: boolean;
/** Callback when speech recognition result is available */
onResult?: (transcript: string, isFinal: boolean) => void;
/** Callback when an error occurs */
onError?: (error: string) => void;
/** Callback when speech recognition starts */
onStart?: () => void;
/** Callback when speech recognition ends */
onEnd?: () => void;
/** Callback when voice activity is detected (first interim result) - useful for interrupting TTS */
onVoiceDetected?: () => void;
}
export interface UseSpeechRecognitionReturn {
/** Start listening for speech */
startListening: () => Promise<boolean>;
/** Stop listening and finalize result */
stopListening: () => void;
/** Abort listening without processing */
abortListening: () => void;
/** Whether currently listening */
isListening: boolean;
/** Whether speech recognition is available on this device */
isAvailable: boolean;
/** Current recognized text (updates in real-time if interimResults=true) */
recognizedText: string;
/** Partial transcript (interim result, not final) */
partialTranscript: string;
/** Error message if any */
error: string | null;
/** Clear the recognized text and error */
reset: () => void;
}
export function useSpeechRecognition(
options: UseSpeechRecognitionOptions = {}
): UseSpeechRecognitionReturn {
const {
lang = 'en-US',
interimResults = true,
continuous = false,
addsPunctuation = true,
onResult,
onError,
onStart,
onEnd,
onVoiceDetected,
} = options;
const [isListening, setIsListening] = useState(false);
const [isAvailable, setIsAvailable] = useState(true);
const [recognizedText, setRecognizedText] = useState('');
const [partialTranscript, setPartialTranscript] = useState('');
const [error, setError] = useState<string | null>(null);
// Track if we're in the middle of starting to prevent double-starts
const isStartingRef = useRef(false);
// Track if voice has been detected in current session (for onVoiceDetected callback)
const voiceDetectedRef = useRef(false);
// Check availability on mount
useEffect(() => {
const checkAvailability = async () => {
try {
// Check if we can get permissions (indirect availability check)
const status = await ExpoSpeechRecognitionModule.getPermissionsAsync();
// If we can query permissions, the module is available
setIsAvailable(true);
console.log('[SpeechRecognition] Available, permission status:', status.status);
} catch (err) {
console.error('[SpeechRecognition] Not available:', err);
setIsAvailable(false);
}
};
checkAvailability();
}, []);
// Event: Recognition started
useSpeechRecognitionEvent('start', () => {
console.log('[SpeechRecognition] Started');
setIsListening(true);
setError(null);
isStartingRef.current = false;
voiceDetectedRef.current = false; // Reset voice detection flag for new session
onStart?.();
});
// Event: Recognition ended
useSpeechRecognitionEvent('end', () => {
console.log('[SpeechRecognition] Ended');
setIsListening(false);
setPartialTranscript('');
isStartingRef.current = false;
voiceDetectedRef.current = false; // Reset for next session
onEnd?.();
});
// Event: Result available
useSpeechRecognitionEvent('result', (event) => {
const results = event.results;
if (results && results.length > 0) {
const result = results[results.length - 1];
const transcript = result?.transcript || '';
const isFinal = event.isFinal ?? false;
console.log('[SpeechRecognition] Result:', transcript.slice(0, 50), 'final:', isFinal);
// Trigger onVoiceDetected on first result (voice activity detected)
if (!voiceDetectedRef.current && transcript.length > 0) {
voiceDetectedRef.current = true;
console.log('[SpeechRecognition] Voice activity detected');
onVoiceDetected?.();
}
if (isFinal) {
setRecognizedText(transcript);
setPartialTranscript('');
} else {
setPartialTranscript(transcript);
}
onResult?.(transcript, isFinal);
}
});
// Event: Error occurred
useSpeechRecognitionEvent('error', (event) => {
const errorMessage = event.message || event.error || 'Speech recognition error';
console.error('[SpeechRecognition] Error:', errorMessage);
// Don't set error for "no-speech" - this is normal when user doesn't say anything
if (event.error !== 'no-speech') {
setError(errorMessage);
onError?.(errorMessage);
}
setIsListening(false);
isStartingRef.current = false;
});
/**
* Start listening for speech
* @returns true if started successfully, false otherwise
*/
const startListening = useCallback(async (): Promise<boolean> => {
if (isListening || isStartingRef.current) {
console.log('[SpeechRecognition] Already listening or starting');
return false;
}
if (!isAvailable) {
const msg = 'Speech recognition is not available on this device';
console.error('[SpeechRecognition]', msg);
setError(msg);
onError?.(msg);
return false;
}
isStartingRef.current = true;
setError(null);
setRecognizedText('');
setPartialTranscript('');
try {
// Request permissions
const permissionResult = await ExpoSpeechRecognitionModule.requestPermissionsAsync();
if (!permissionResult.granted) {
const msg = 'Microphone permission denied';
console.error('[SpeechRecognition]', msg);
setError(msg);
onError?.(msg);
isStartingRef.current = false;
return false;
}
console.log('[SpeechRecognition] Starting with lang:', lang);
// Start recognition
ExpoSpeechRecognitionModule.start({
lang,
interimResults,
continuous,
addsPunctuation: Platform.OS === 'ios' ? addsPunctuation : undefined,
// Android-specific: longer silence timeout for more natural pauses
androidIntentOptions: Platform.OS === 'android' ? {
EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS: 2000,
EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS: 1500,
} : undefined,
});
return true;
} catch (err) {
const msg = err instanceof Error ? err.message : 'Failed to start speech recognition';
console.error('[SpeechRecognition] Start error:', msg);
setError(msg);
onError?.(msg);
isStartingRef.current = false;
return false;
}
}, [isListening, isAvailable, lang, interimResults, continuous, addsPunctuation, onError]);
/**
* Stop listening and process final result
*/
const stopListening = useCallback(() => {
if (!isListening && !isStartingRef.current) {
console.log('[SpeechRecognition] Not listening, nothing to stop');
return;
}
console.log('[SpeechRecognition] Stopping...');
try {
ExpoSpeechRecognitionModule.stop();
} catch (err) {
console.warn('[SpeechRecognition] Stop error:', err);
}
}, [isListening]);
/**
* Abort listening without processing
*/
const abortListening = useCallback(() => {
if (!isListening && !isStartingRef.current) {
return;
}
console.log('[SpeechRecognition] Aborting...');
try {
ExpoSpeechRecognitionModule.abort();
} catch (err) {
console.warn('[SpeechRecognition] Abort error:', err);
}
setIsListening(false);
setPartialTranscript('');
isStartingRef.current = false;
}, [isListening]);
/**
* Reset state
*/
const reset = useCallback(() => {
setRecognizedText('');
setPartialTranscript('');
setError(null);
}, []);
return {
startListening,
stopListening,
abortListening,
isListening,
isAvailable,
recognizedText,
partialTranscript,
error,
reset,
};
}

View File

@ -1,252 +0,0 @@
/**
* Text-to-Speech Hook
*
* Wraps expo-speech for easy use in components.
* Provides speak/stop controls, status states, and queue management.
*
* Usage:
* ```typescript
* const { speak, stop, isSpeaking, error } = useTextToSpeech();
*
* // Speak text
* await speak('Hello world');
*
* // Stop speaking
* stop();
*
* // Check if speaking
* if (isSpeaking) { ... }
* ```
*/
import { useState, useCallback, useRef, useEffect } from 'react';
import * as Speech from 'expo-speech';
export interface UseTextToSpeechOptions {
/** Language for speech (default: 'en-US') */
language?: string;
/** Speech rate, 0.5-2.0 (default: 0.9) */
rate?: number;
/** Speech pitch, 0.5-2.0 (default: 1.0) */
pitch?: number;
/** Voice identifier (optional, uses system default) */
voice?: string;
/** Callback when speech starts */
onStart?: () => void;
/** Callback when speech ends */
onDone?: () => void;
/** Callback when speech is stopped */
onStopped?: () => void;
/** Callback when an error occurs */
onError?: (error: string) => void;
}
export interface UseTextToSpeechReturn {
/** Speak text using TTS */
speak: (text: string, options?: Partial<UseTextToSpeechOptions>) => Promise<void>;
/** Stop speaking */
stop: () => void;
/** Whether currently speaking */
isSpeaking: boolean;
/** Whether TTS is available on this device */
isAvailable: boolean;
/** Current text being spoken */
currentText: string | null;
/** Error message if any */
error: string | null;
/** Get available voices */
getVoices: () => Promise<Speech.Voice[]>;
/** Clear error state */
clearError: () => void;
}
export function useTextToSpeech(
options: UseTextToSpeechOptions = {}
): UseTextToSpeechReturn {
const {
language = 'en-US',
rate = 0.9,
pitch = 1.0,
voice,
onStart,
onDone,
onStopped,
onError,
} = options;
const [isSpeaking, setIsSpeaking] = useState(false);
const [isAvailable, setIsAvailable] = useState(true);
const [currentText, setCurrentText] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
// Track if component is mounted to prevent state updates after unmount
const isMountedRef = useRef(true);
// Track current speech promise resolve
const resolveRef = useRef<(() => void) | null>(null);
// Check if currently speaking on mount and cleanup
useEffect(() => {
isMountedRef.current = true;
const checkSpeaking = async () => {
try {
const speaking = await Speech.isSpeakingAsync();
if (isMountedRef.current) {
setIsSpeaking(speaking);
}
} catch (err) {
console.warn('[TTS] Could not check speaking status:', err);
}
};
checkSpeaking();
return () => {
isMountedRef.current = false;
// Stop any ongoing speech when unmounting
Speech.stop();
};
}, []);
/**
* Speak text using TTS
* @param text - Text to speak
* @param overrideOptions - Override default options for this call
* @returns Promise that resolves when speech completes
*/
const speak = useCallback(
async (
text: string,
overrideOptions?: Partial<UseTextToSpeechOptions>
): Promise<void> => {
const trimmedText = text.trim();
if (!trimmedText) {
console.log('[TTS] Empty text, skipping');
return;
}
// Merge options
const opts = {
language: overrideOptions?.language ?? language,
rate: overrideOptions?.rate ?? rate,
pitch: overrideOptions?.pitch ?? pitch,
voice: overrideOptions?.voice ?? voice,
onStart: overrideOptions?.onStart ?? onStart,
onDone: overrideOptions?.onDone ?? onDone,
onStopped: overrideOptions?.onStopped ?? onStopped,
onError: overrideOptions?.onError ?? onError,
};
// Stop any current speech before starting new
if (isSpeaking) {
Speech.stop();
// Wait a bit for cleanup
await new Promise((r) => setTimeout(r, 50));
}
console.log('[TTS] Speaking:', trimmedText.slice(0, 50) + (trimmedText.length > 50 ? '...' : ''));
if (isMountedRef.current) {
setCurrentText(trimmedText);
setIsSpeaking(true);
setError(null);
}
return new Promise<void>((resolve) => {
resolveRef.current = resolve;
Speech.speak(trimmedText, {
language: opts.language,
rate: opts.rate,
pitch: opts.pitch,
voice: opts.voice,
onStart: () => {
console.log('[TTS] Started');
opts.onStart?.();
},
onDone: () => {
console.log('[TTS] Completed');
if (isMountedRef.current) {
setIsSpeaking(false);
setCurrentText(null);
}
opts.onDone?.();
resolveRef.current = null;
resolve();
},
onStopped: () => {
console.log('[TTS] Stopped');
if (isMountedRef.current) {
setIsSpeaking(false);
setCurrentText(null);
}
opts.onStopped?.();
resolveRef.current = null;
resolve();
},
onError: (err) => {
const errorMsg = typeof err === 'string' ? err : 'Speech synthesis error';
console.error('[TTS] Error:', errorMsg);
if (isMountedRef.current) {
setIsSpeaking(false);
setCurrentText(null);
setError(errorMsg);
}
opts.onError?.(errorMsg);
resolveRef.current = null;
resolve();
},
});
});
},
[language, rate, pitch, voice, isSpeaking, onStart, onDone, onStopped, onError]
);
/**
* Stop speaking
*/
const stop = useCallback(() => {
console.log('[TTS] Stop requested');
Speech.stop();
if (isMountedRef.current) {
setIsSpeaking(false);
setCurrentText(null);
}
// Resolve pending promise
if (resolveRef.current) {
resolveRef.current();
resolveRef.current = null;
}
}, []);
/**
* Get available voices for speech synthesis
*/
const getVoices = useCallback(async (): Promise<Speech.Voice[]> => {
try {
const voices = await Speech.getAvailableVoicesAsync();
console.log('[TTS] Available voices:', voices.length);
return voices;
} catch (err) {
console.error('[TTS] Could not get voices:', err);
return [];
}
}, []);
/**
* Clear error state
*/
const clearError = useCallback(() => {
setError(null);
}, []);
return {
speak,
stop,
isSpeaking,
isAvailable,
currentText,
error,
getVoices,
clearError,
};
}

48
package-lock.json generated
View File

@ -8,9 +8,11 @@
"name": "wellnuo",
"version": "1.0.0",
"dependencies": {
"@config-plugins/react-native-webrtc": "^13.0.0",
"@dr.pogodin/react-native-fs": "^2.36.2",
"@expo/vector-icons": "^15.0.3",
"@jamsch/expo-speech-recognition": "^0.2.15",
"@livekit/react-native": "^2.9.6",
"@livekit/react-native-expo-plugin": "^1.0.1",
"@notifee/react-native": "^9.1.8",
"@react-navigation/bottom-tabs": "^7.4.0",
"@react-navigation/elements": "^2.6.3",
@ -27,12 +29,12 @@
"expo-linking": "~8.0.10",
"expo-router": "~6.0.19",
"expo-secure-store": "^15.0.8",
"expo-speech": "~14.0.6",
"expo-splash-screen": "~31.0.12",
"expo-status-bar": "~3.0.9",
"expo-symbols": "~1.0.8",
"expo-system-ui": "~6.0.9",
"expo-web-browser": "~15.0.10",
"livekit-client": "^2.17.0",
"react": "19.1.0",
"react-dom": "19.1.0",
"react-native": "0.81.5",
@ -1552,6 +1554,15 @@
"integrity": "sha512-wJ8ReQbHxsAfXhrf9ixl0aYbZorRuOWpBNzm8pL8ftmSxQx/wnJD5Eg861NwJU/czy2VXFIebCeZnZrI9rktIQ==",
"license": "(Apache-2.0 AND BSD-3-Clause)"
},
"node_modules/@config-plugins/react-native-webrtc": {
"version": "13.0.0",
"resolved": "https://registry.npmjs.org/@config-plugins/react-native-webrtc/-/react-native-webrtc-13.0.0.tgz",
"integrity": "sha512-EtRRLXmsU4GcDA3TgIxtqg++eh/CjbI6EV8N/1EFQTtaWI2lpww0fg+S0wd+ndXE0dFWaLqUFvZuyTAaAoOSeA==",
"license": "MIT",
"peerDependencies": {
"expo": "^54"
}
},
"node_modules/@dr.pogodin/react-native-fs": {
"version": "2.36.2",
"resolved": "https://registry.npmjs.org/@dr.pogodin/react-native-fs/-/react-native-fs-2.36.2.tgz",
@ -3002,18 +3013,6 @@
"node": ">=8"
}
},
"node_modules/@jamsch/expo-speech-recognition": {
"version": "0.2.15",
"resolved": "https://registry.npmjs.org/@jamsch/expo-speech-recognition/-/expo-speech-recognition-0.2.15.tgz",
"integrity": "sha512-VzhR6a1bYnh8Yl704sBbvCmPqkZWzEggzl504myy6GKqQ90Ib+FQsz9FKI8RQbBXf8KHfhJVT3t0AxP6lYyyYw==",
"deprecated": "Package has moved to expo-speech-recognition",
"license": "MIT",
"peerDependencies": {
"expo": "*",
"react": "*",
"react-native": "*"
}
},
"node_modules/@jest/create-cache-key-function": {
"version": "29.7.0",
"resolved": "https://registry.npmjs.org/@jest/create-cache-key-function/-/create-cache-key-function-29.7.0.tgz",
@ -3246,6 +3245,18 @@
"react-native": "*"
}
},
"node_modules/@livekit/react-native-expo-plugin": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/@livekit/react-native-expo-plugin/-/react-native-expo-plugin-1.0.1.tgz",
"integrity": "sha512-CSPjjzgDDlBH1ZyFyaw7/FW2Ql1S51eUkIxv/vjGwVshn+lUD6eQ9VgfUh7ha84itvjXi9X87FvP0XWKn9CiFQ==",
"license": "Apache-2.0",
"peerDependencies": {
"@livekit/react-native": "^2.1.0",
"expo": "*",
"react": "*",
"react-native": "*"
}
},
"node_modules/@livekit/react-native-webrtc": {
"version": "137.0.2",
"resolved": "https://registry.npmjs.org/@livekit/react-native-webrtc/-/react-native-webrtc-137.0.2.tgz",
@ -7374,15 +7385,6 @@
"node": ">=20.16.0"
}
},
"node_modules/expo-speech": {
"version": "14.0.8",
"resolved": "https://registry.npmjs.org/expo-speech/-/expo-speech-14.0.8.tgz",
"integrity": "sha512-UjBFCFv58nutlLw92L7kUS0ZjbOOfaTdiEv/HbjvMrT6BfldoOLLBZbaEcEhDdZK36NY/kass0Kzxk+co6vxSQ==",
"license": "MIT",
"peerDependencies": {
"expo": "*"
}
},
"node_modules/expo-splash-screen": {
"version": "31.0.12",
"resolved": "https://registry.npmjs.org/expo-splash-screen/-/expo-splash-screen-31.0.12.tgz",

View File

@ -11,9 +11,11 @@
"lint": "expo lint"
},
"dependencies": {
"@config-plugins/react-native-webrtc": "^13.0.0",
"@dr.pogodin/react-native-fs": "^2.36.2",
"@expo/vector-icons": "^15.0.3",
"@jamsch/expo-speech-recognition": "^0.2.15",
"@livekit/react-native": "^2.9.6",
"@livekit/react-native-expo-plugin": "^1.0.1",
"@notifee/react-native": "^9.1.8",
"@react-navigation/bottom-tabs": "^7.4.0",
"@react-navigation/elements": "^2.6.3",
@ -30,12 +32,12 @@
"expo-linking": "~8.0.10",
"expo-router": "~6.0.19",
"expo-secure-store": "^15.0.8",
"expo-speech": "~14.0.6",
"expo-splash-screen": "~31.0.12",
"expo-status-bar": "~3.0.9",
"expo-symbols": "~1.0.8",
"expo-system-ui": "~6.0.9",
"expo-web-browser": "~15.0.10",
"livekit-client": "^2.17.0",
"react": "19.1.0",
"react-dom": "19.1.0",
"react-native": "0.81.5",

View File

@ -3,6 +3,8 @@
*
* Ensures only ONE voice call can be active at a time per device.
* If a new call is started while another is active, the old one is disconnected first.
*
* This addresses the LiveKit concurrent agent jobs limit (5 per project).
*/
type DisconnectCallback = () => Promise<void>;

146
services/livekitService.ts Normal file
View File

@ -0,0 +1,146 @@
/**
* LiveKit Voice AI Service
* Connects to LiveKit Cloud with Julia AI agent
* Uses dedicated Julia Token Server for token generation
*/
// Julia Token Server (dedicated endpoint for LiveKit tokens)
const JULIA_TOKEN_SERVER = 'https://wellnuo.smartlaunchhub.com/julia';
// Voice configuration
export const VOICE_ID = 'Asteria';
export const VOICE_NAME = 'Asteria';
// ============================================================================
// SINGLE_DEPLOYMENT_MODE
// When true: sends only deploymentId (no beneficiaryNamesDict)
// When false: sends both deploymentId AND beneficiaryNamesDict
//
// Use true for WellNuo Lite (single beneficiary per user)
// Use false for full WellNuo app (multiple beneficiaries)
// ============================================================================
export const SINGLE_DEPLOYMENT_MODE = true;
// Beneficiary data to pass to voice agent
export interface BeneficiaryData {
deploymentId: string;
beneficiaryNamesDict: Record<string, string>;
}
// API Response types
export interface LiveKitTokenResponse {
success: boolean;
data?: {
token: string;
roomName: string;
wsUrl: string;
};
error?: string;
}
/**
* Get a LiveKit access token from Julia Token Server
* No authentication required - token server is dedicated for voice AI
* @param userId - User identifier
* @param beneficiaryData - Optional beneficiary data to pass to voice agent
*/
export async function getToken(
userId: string,
beneficiaryData?: BeneficiaryData
): Promise<LiveKitTokenResponse> {
try {
console.log('[LiveKit] Getting token for user:', userId);
console.log('[LiveKit] SINGLE_DEPLOYMENT_MODE:', SINGLE_DEPLOYMENT_MODE);
// Prepare request body based on SINGLE_DEPLOYMENT_MODE
let requestBody: { userId: string; beneficiaryData?: BeneficiaryData };
if (SINGLE_DEPLOYMENT_MODE && beneficiaryData) {
// In single deployment mode: send only deploymentId, no beneficiaryNamesDict
requestBody = {
userId,
beneficiaryData: {
deploymentId: beneficiaryData.deploymentId,
beneficiaryNamesDict: {}, // Empty - no list of names
},
};
console.log('[LiveKit] Single deployment mode - sending only deploymentId:', beneficiaryData.deploymentId);
} else {
// Full mode: send everything
requestBody = { userId, beneficiaryData };
if (beneficiaryData) {
console.log('[LiveKit] Full mode - sending beneficiary data:', beneficiaryData);
}
}
// Request LiveKit token from Julia Token Server
const response = await fetch(`${JULIA_TOKEN_SERVER}/token`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
console.error('[LiveKit] Token request failed:', response.status, errorData);
return {
success: false,
error: errorData.error || `Failed to get token: ${response.status}`,
};
}
const data = await response.json();
if (!data.success) {
return {
success: false,
error: data.error || 'Token generation failed',
};
}
console.log('[LiveKit] Token received:', {
room: data.data.roomName,
identity: data.data.identity,
url: data.data.wsUrl,
});
return {
success: true,
data: {
token: data.data.token,
roomName: data.data.roomName,
wsUrl: data.data.wsUrl,
},
};
} catch (error) {
console.error('[LiveKit] Get token error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to get token',
};
}
}
/**
* Check if LiveKit service is available
*/
export async function checkServerHealth(): Promise<boolean> {
try {
const response = await fetch(`${JULIA_TOKEN_SERVER}/health`, {
method: 'GET',
});
if (response.ok) {
const data = await response.json();
console.log('[LiveKit] Health check:', data);
return data.status === 'ok';
}
return false;
} catch (error) {
console.error('[LiveKit] Health check failed:', error);
return false;
}
}

View File

@ -0,0 +1,336 @@
# FEATURE-002: LiveKit Voice Call with Julia AI
## Summary
Полноценный голосовой звонок с Julia AI через LiveKit Cloud. Пользователь нажимает кнопку "Start Voice Call", открывается экран звонка в стиле телефона, и он может разговаривать с Julia AI голосом.
## Status: 🔴 Not Started (требуется полная переделка)
## Priority: Critical
## Problem Statement
Текущая реализация имеет следующие проблемы:
1. **STT (Speech-to-Text) работает нестабильно** — микрофон иногда детектируется, иногда нет
2. **TTS работает** — голос Julia слышен
3. **Код сложный и запутанный** — много legacy кода, полифиллов, хаков
4. **Нет четкой архитектуры** — все в одном файле voice-call.tsx
## Root Cause Analysis
### Почему микрофон работает нестабильно:
1. **iOS AudioSession** — неправильная конфигурация или race condition при настройке
2. **registerGlobals()** — WebRTC polyfills могут не успевать инициализироваться
3. **Permissions** — микрофон может быть не разрешен или занят другим процессом
4. **Event handling** — события LiveKit могут теряться
### Что работает:
- LiveKit Cloud connection ✅
- Token generation ✅
- TTS (Deepgram Asteria) ✅
- Backend agent (Julia AI) ✅
---
## Architecture
### System Overview
```
┌─────────────────────────────────────────────────────────────────────┐
│ WellNuo Lite App (iOS) │
├─────────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────┐ ┌──────────────────┐ ┌──────────────────┐ │
│ │ Voice Tab │───▶│ VoiceCallScreen │───▶│ LiveKit Room │ │
│ │ (entry) │ │ (fullscreen) │ │ (WebRTC) │ │
│ └──────────────┘ └──────────────────┘ └──────────────────┘ │
│ │ │ │
│ ▼ ▼ │
│ ┌──────────────┐ ┌──────────────┐ │
│ │useLiveKitRoom│ │ AudioSession │ │
│ │ (hook) │ │ (iOS native) │ │
│ └──────────────┘ └──────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────┘
│ WebSocket + WebRTC
┌─────────────────────────────────────────────────────────────────────┐
│ LiveKit Cloud │
├─────────────────────────────────────────────────────────────────────┤
│ Room: wellnuo-{userId}-{timestamp} │
│ Participants: user + julia-agent │
│ Audio Tracks: bidirectional │
└─────────────────────────────────────────────────────────────────────┘
│ Agent dispatch
┌─────────────────────────────────────────────────────────────────────┐
│ Julia AI Agent (Python) │
├─────────────────────────────────────────────────────────────────────┤
│ STT: Deepgram Nova-2 │
│ LLM: WellNuo voice_ask API │
│ TTS: Deepgram Aura Asteria │
│ Framework: LiveKit Agents SDK 1.3.11 │
└─────────────────────────────────────────────────────────────────────┘
```
### Data Flow
```
User speaks → iOS Mic → WebRTC → LiveKit Cloud → Agent → Deepgram STT
WellNuo API (LLM)
Agent receives text ← LiveKit Cloud ← WebRTC ← Deepgram TTS (audio)
iOS Speaker → User hears Julia
```
---
## Technical Requirements
### Dependencies (package.json)
```json
{
"@livekit/react-native": "^2.x",
"livekit-client": "^2.x",
"expo-keep-awake": "^14.x"
}
```
### iOS Permissions (app.json)
```json
{
"ios": {
"infoPlist": {
"NSMicrophoneUsageDescription": "WellNuo needs microphone access for voice calls with Julia AI",
"UIBackgroundModes": ["audio", "voip"]
}
}
}
```
### Token Server (already exists)
- **URL**: `https://wellnuo.smartlaunchhub.com/julia/token`
- **Method**: POST
- **Body**: `{ "userId": "string" }`
- **Response**: `{ "success": true, "data": { "token", "roomName", "wsUrl" } }`
---
## Implementation Steps
### Phase 1: Cleanup (DELETE old code)
- [ ] 1.1. Delete `app/voice-call.tsx` (current broken implementation)
- [ ] 1.2. Keep `app/(tabs)/voice.tsx` (entry point) but simplify
- [ ] 1.3. Keep `services/livekitService.ts` (token fetching)
- [ ] 1.4. Keep `contexts/VoiceTranscriptContext.tsx` (transcript storage)
- [ ] 1.5. Delete `components/VoiceIndicator.tsx` (unused)
- [ ] 1.6. Delete `polyfills/livekit-globals.ts` (not needed with proper setup)
### Phase 2: New Architecture
- [ ] 2.1. Create `hooks/useLiveKitRoom.ts` — encapsulate all LiveKit logic
- [ ] 2.2. Create `app/voice-call.tsx` — simple UI component using the hook
- [ ] 2.3. Create `utils/audioSession.ts` — iOS AudioSession helper
### Phase 3: useLiveKitRoom Hook
**File**: `hooks/useLiveKitRoom.ts`
```typescript
interface UseLiveKitRoomOptions {
userId: string;
onTranscript?: (role: 'user' | 'assistant', text: string) => void;
}
interface UseLiveKitRoomReturn {
// Connection state
state: 'idle' | 'connecting' | 'connected' | 'reconnecting' | 'disconnected' | 'error';
error: string | null;
// Call info
roomName: string | null;
callDuration: number; // seconds
// Audio state
isMuted: boolean;
isSpeaking: boolean; // agent is speaking
// Actions
connect: () => Promise<void>;
disconnect: () => Promise<void>;
toggleMute: () => void;
}
```
**Implementation requirements**:
1. MUST call `registerGlobals()` BEFORE importing `livekit-client`
2. MUST configure iOS AudioSession BEFORE connecting to room
3. MUST handle all RoomEvents properly
4. MUST cleanup on unmount (disconnect, stop audio session)
5. MUST handle background/foreground transitions
### Phase 4: iOS AudioSession Configuration
**Critical for microphone to work!**
```typescript
// utils/audioSession.ts
import { AudioSession } from '@livekit/react-native';
import { Platform } from 'react-native';
export async function configureAudioForVoiceCall(): Promise<void> {
if (Platform.OS !== 'ios') return;
// Step 1: Set Apple audio configuration
await AudioSession.setAppleAudioConfiguration({
audioCategory: 'playAndRecord',
audioCategoryOptions: [
'allowBluetooth',
'allowBluetoothA2DP',
'defaultToSpeaker',
'mixWithOthers',
],
audioMode: 'voiceChat',
});
// Step 2: Configure output
await AudioSession.configureAudio({
ios: {
defaultOutput: 'speaker',
},
});
// Step 3: Start session
await AudioSession.startAudioSession();
}
export async function stopAudioSession(): Promise<void> {
if (Platform.OS !== 'ios') return;
await AudioSession.stopAudioSession();
}
```
### Phase 5: Voice Call Screen UI
**File**: `app/voice-call.tsx`
Simple, clean UI:
- Avatar with Julia "J" letter
- Call duration timer
- Status text (Connecting... / Connected / Julia is speaking...)
- Mute button
- End call button
- Debug logs toggle (for development)
**NO complex logic in this file** — all LiveKit logic in the hook!
### Phase 6: Testing Checklist
- [ ] 6.1. Fresh app launch → Start call → Can hear Julia greeting
- [ ] 6.2. Speak → Julia responds → Conversation works
- [ ] 6.3. Mute → Unmute → Still works
- [ ] 6.4. End call → Clean disconnect
- [ ] 6.5. App to background → Audio continues
- [ ] 6.6. App to foreground → Still connected
- [ ] 6.7. Multiple calls in a row → No memory leaks
- [ ] 6.8. No microphone permission → Shows error
---
## Files to Create/Modify
| File | Action | Description |
|------|--------|-------------|
| `hooks/useLiveKitRoom.ts` | CREATE | Main LiveKit hook with all logic |
| `utils/audioSession.ts` | CREATE | iOS AudioSession helpers |
| `app/voice-call.tsx` | REPLACE | Simple UI using the hook |
| `app/(tabs)/voice.tsx` | SIMPLIFY | Just entry point, remove debug UI |
| `services/livekitService.ts` | KEEP | Token fetching (already works) |
| `contexts/VoiceTranscriptContext.tsx` | KEEP | Transcript storage |
| `components/VoiceIndicator.tsx` | DELETE | Not needed |
| `polyfills/livekit-globals.ts` | DELETE | Not needed |
---
## Key Principles
### 1. Separation of Concerns
- **Hook** handles ALL LiveKit/WebRTC logic
- **Screen** only renders UI based on hook state
- **Utils** for platform-specific code (AudioSession)
### 2. Proper Initialization Order
```
1. registerGlobals() — WebRTC polyfills
2. configureAudioForVoiceCall() — iOS audio
3. getToken() — fetch from server
4. room.connect() — connect to LiveKit
5. room.localParticipant.setMicrophoneEnabled(true) — enable mic
```
### 3. Proper Cleanup Order
```
1. room.disconnect() — leave room
2. stopAudioSession() — release iOS audio
3. Clear all refs and state
```
### 4. Error Handling
- Every async operation wrapped in try/catch
- User-friendly error messages
- Automatic retry for network issues
- Graceful degradation
---
## Success Criteria
1. ✅ User can start voice call and hear Julia greeting
2. ✅ User can speak and Julia understands (STT works reliably)
3. ✅ Julia responds with voice (TTS works)
4. ✅ Conversation can continue back and forth
5. ✅ Mute/unmute works
6. ✅ End call cleanly disconnects
7. ✅ No console errors or warnings
8. ✅ Works on iOS device (not just simulator)
---
## Related Links
- [LiveKit React Native SDK](https://docs.livekit.io/client-sdk-js/react-native/)
- [LiveKit Agents Python](https://docs.livekit.io/agents/)
- [Deepgram STT/TTS](https://deepgram.com/)
- [iOS AVAudioSession](https://developer.apple.com/documentation/avfaudio/avaudiosession)
---
## Notes
### Why previous approach failed:
1. **Too much code in one file** — voice-call.tsx had 900+ lines with all logic mixed
2. **Polyfills applied wrong** — Event class polyfill was inside the component
3. **AudioSession configured too late** — sometimes after connect() already started
4. **No proper error boundaries** — errors silently failed
5. **Race conditions** — multiple async operations without proper sequencing
### What's different this time:
1. **Hook-based architecture** — single source of truth for state
2. **Proper initialization sequence** — documented and enforced
3. **Clean separation** — UI knows nothing about WebRTC
4. **Comprehensive logging** — every step logged for debugging
5. **Test-driven** — write tests before implementation

373
utils/audioSession.ts Normal file
View File

@ -0,0 +1,373 @@
/**
* Audio Session Configuration Helpers (iOS + Android)
*
* CRITICAL: This must be configured BEFORE connecting to LiveKit room!
* Without proper AudioSession setup, microphone won't work on iOS.
* On Android, this controls speaker/earpiece routing.
*/
import { Platform } from 'react-native';
/**
* Represents an available audio output device
*/
export interface AudioOutputDevice {
id: string;
name: string;
type: 'speaker' | 'earpiece' | 'bluetooth' | 'headphones' | 'unknown';
}
// AudioSession module - use 'any' to avoid complex typing issues with @livekit/react-native
// The actual AudioSession from LiveKit has specific enum types that are hard to match statically
let audioSessionModule: any = null;
/**
* Import AudioSession module lazily
* This is needed because @livekit/react-native must be imported after registerGlobals()
*/
async function getAudioSession(): Promise<any | null> {
if (!audioSessionModule) {
const livekit = await import('@livekit/react-native');
audioSessionModule = livekit.AudioSession;
}
return audioSessionModule;
}
/**
* Configure AudioSession for bidirectional voice call (iOS + Android)
*
* MUST be called BEFORE connecting to LiveKit room!
*
* iOS Configuration:
* - Category: playAndRecord (both speaker and mic)
* - Mode: voiceChat (optimized for voice calls)
* - Options: Bluetooth, speaker, mix with others
*
* Android Configuration:
* - audioTypeOptions: communication (for voice calls)
* - forceHandleAudioRouting: true (to control speaker/earpiece)
*/
export async function configureAudioForVoiceCall(): Promise<void> {
console.log(`[AudioSession] Configuring for voice call on ${Platform.OS}...`);
try {
const AudioSession = await getAudioSession();
if (!AudioSession) {
console.error('[AudioSession] Failed to get AudioSession module');
return;
}
if (Platform.OS === 'ios') {
// iOS-specific configuration - FORCE SPEAKER OUTPUT
// Using videoChat mode + defaultSpeakerOutput option for guaranteed speaker
console.log('[AudioSession] Configuring iOS for SPEAKER output...');
try {
// Primary config: videoChat mode with defaultSpeakerOutput
await AudioSession.setAppleAudioConfiguration({
audioCategory: 'playAndRecord',
audioCategoryOptions: [
'allowBluetooth',
'mixWithOthers',
'defaultToSpeaker', // KEY: Forces speaker as default output
],
audioMode: 'videoChat', // videoChat mode uses speaker by default
});
console.log('[AudioSession] iOS videoChat + defaultToSpeaker configured!');
} catch (err) {
console.warn('[AudioSession] Primary iOS config failed, trying fallback:', err);
// Fallback: just videoChat without defaultToSpeaker option
await AudioSession.setAppleAudioConfiguration({
audioCategory: 'playAndRecord',
audioCategoryOptions: ['allowBluetooth', 'mixWithOthers'],
audioMode: 'videoChat',
});
}
console.log('[AudioSession] Starting iOS audio session...');
await AudioSession.startAudioSession();
// Additionally set default output to speaker (belt and suspenders)
try {
console.log('[AudioSession] Setting iOS default output to speaker...');
await AudioSession.configureAudio({
ios: {
defaultOutput: 'speaker',
},
});
console.log('[AudioSession] iOS speaker output set!');
} catch (outputErr) {
console.warn('[AudioSession] Could not set speaker output:', outputErr);
}
} else if (Platform.OS === 'android') {
// Android-specific configuration - FORCE SPEAKER OUTPUT
// CRITICAL: Use 'inCommunication' mode + 'music' stream for speaker
// Many Android devices default to earpiece for voice calls
console.log('[AudioSession] Configuring Android audio for SPEAKER...');
await AudioSession.configureAudio({
android: {
// Use inCommunication mode but with music stream for speaker
audioTypeOptions: {
manageAudioFocus: true,
// inCommunication gives us more control over audio routing
audioMode: 'inCommunication',
audioFocusMode: 'gain',
// Use 'music' stream - goes to speaker by default!
audioStreamType: 'music',
audioAttributesUsageType: 'media',
audioAttributesContentType: 'music',
},
// Force speaker as output
preferredOutputList: ['speaker'],
// Allow us to control audio routing
forceHandleAudioRouting: true,
},
});
console.log('[AudioSession] Starting Android audio session...');
await AudioSession.startAudioSession();
// After starting, explicitly set speaker output
console.log('[AudioSession] Forcing speaker output...');
try {
await AudioSession.showAudioRoutePicker?.();
} catch {
// showAudioRoutePicker may not be available, that's ok
}
console.log('[AudioSession] Android speaker mode configured!');
}
console.log('[AudioSession] Configuration complete!');
} catch (error) {
console.error('[AudioSession] Configuration error:', error);
throw error;
}
}
/**
* Stop AudioSession (iOS + Android)
*
* Should be called when disconnecting from voice call
*/
export async function stopAudioSession(): Promise<void> {
if (Platform.OS !== 'ios' && Platform.OS !== 'android') {
return;
}
console.log(`[AudioSession] Stopping audio session on ${Platform.OS}...`);
try {
const AudioSession = await getAudioSession();
if (!AudioSession) {
return;
}
await AudioSession.stopAudioSession();
console.log('[AudioSession] Stopped');
} catch (error) {
console.error('[AudioSession] Error stopping:', error);
// Don't throw - cleanup errors are not critical
}
}
/**
* Reconfigure audio session after remote track arrives (iOS + Android)
*
* Sometimes the OS needs a kick to properly route audio after remote participant joins
*/
export async function reconfigureAudioForPlayback(): Promise<void> {
if (Platform.OS !== 'ios' && Platform.OS !== 'android') {
return;
}
console.log(`[AudioSession] Reconfiguring for playback (SPEAKER) on ${Platform.OS}...`);
try {
const AudioSession = await getAudioSession();
if (!AudioSession) {
return;
}
if (Platform.OS === 'ios') {
// Reconfigure iOS - force speaker output
await AudioSession.setAppleAudioConfiguration({
audioCategory: 'playAndRecord',
audioCategoryOptions: [
'allowBluetooth',
'mixWithOthers',
'defaultToSpeaker', // Force speaker
],
audioMode: 'videoChat', // videoChat = speaker by default
});
// Also set default output to speaker
await AudioSession.configureAudio({
ios: {
defaultOutput: 'speaker',
},
});
console.log('[AudioSession] iOS reconfigured for speaker playback');
} else if (Platform.OS === 'android') {
// Reconfigure Android audio to ensure speaker output
// Using inCommunication + music stream for reliable speaker routing
await AudioSession.configureAudio({
android: {
audioTypeOptions: {
manageAudioFocus: true,
audioMode: 'inCommunication',
audioFocusMode: 'gain',
audioStreamType: 'music',
audioAttributesUsageType: 'media',
audioAttributesContentType: 'music',
},
preferredOutputList: ['speaker'],
forceHandleAudioRouting: true,
},
});
console.log('[AudioSession] Android reconfigured for speaker playback');
}
console.log('[AudioSession] Reconfigured successfully');
} catch (error) {
console.error('[AudioSession] Reconfigure error:', error);
// Don't throw - this is a best-effort operation
}
}
/**
* Switch audio output between speaker and earpiece (iOS + Android)
*
* @param useSpeaker - true for speaker, false for earpiece
*/
/**
* Get list of available audio output devices
*
* @returns Array of available audio output devices
*/
export async function getAvailableAudioOutputs(): Promise<AudioOutputDevice[]> {
console.log(`[AudioSession] Getting available audio outputs on ${Platform.OS}...`);
try {
const AudioSession = await getAudioSession();
if (!AudioSession) {
console.error('[AudioSession] Failed to get AudioSession module');
return [];
}
const outputs = await AudioSession.getAudioOutputs();
console.log('[AudioSession] Available outputs:', outputs);
// Map the raw outputs to our AudioOutputDevice interface
if (Array.isArray(outputs)) {
return outputs.map((output: any) => ({
id: output.id || output.deviceId || String(output),
name: output.name || output.deviceName || String(output),
type: mapDeviceType(output.type || output.deviceType),
}));
}
return [];
} catch (error) {
console.error('[AudioSession] getAvailableAudioOutputs error:', error);
return [];
}
}
/**
* Select a specific audio output device by ID
*
* @param deviceId - The ID of the device to select
*/
export async function selectAudioOutput(deviceId: string): Promise<void> {
console.log(`[AudioSession] Selecting audio output: ${deviceId} on ${Platform.OS}...`);
try {
const AudioSession = await getAudioSession();
if (!AudioSession) {
console.error('[AudioSession] Failed to get AudioSession module');
return;
}
await AudioSession.selectAudioOutput(deviceId);
console.log(`[AudioSession] Audio output selected: ${deviceId}`);
} catch (error) {
console.error('[AudioSession] selectAudioOutput error:', error);
}
}
/**
* Map raw device type to our AudioOutputDevice type
*/
function mapDeviceType(rawType: string | undefined): AudioOutputDevice['type'] {
if (!rawType) return 'unknown';
const type = rawType.toLowerCase();
if (type.includes('speaker')) return 'speaker';
if (type.includes('earpiece') || type.includes('receiver')) return 'earpiece';
if (type.includes('bluetooth')) return 'bluetooth';
if (type.includes('headphone') || type.includes('headset') || type.includes('wired')) return 'headphones';
return 'unknown';
}
/**
* Switch audio output between speaker and earpiece (iOS + Android)
*
* @param useSpeaker - true for speaker, false for earpiece
*/
export async function setAudioOutput(useSpeaker: boolean): Promise<void> {
console.log(`[AudioSession] Setting audio output to ${useSpeaker ? 'SPEAKER' : 'EARPIECE'} on ${Platform.OS}...`);
try {
const AudioSession = await getAudioSession();
if (!AudioSession) {
console.error('[AudioSession] Failed to get AudioSession module');
return;
}
if (Platform.OS === 'ios') {
// iOS: Use videoChat mode + defaultToSpeaker for speaker, voiceChat for earpiece
await AudioSession.setAppleAudioConfiguration({
audioCategory: 'playAndRecord',
audioCategoryOptions: useSpeaker
? ['allowBluetooth', 'mixWithOthers', 'defaultToSpeaker']
: ['allowBluetooth', 'mixWithOthers'],
audioMode: useSpeaker ? 'videoChat' : 'voiceChat',
});
// Also set default output
await AudioSession.configureAudio({
ios: {
defaultOutput: useSpeaker ? 'speaker' : 'earpiece',
},
});
} else if (Platform.OS === 'android') {
// Android: Switch stream type to control speaker/earpiece
// - 'music' stream goes to speaker by default
// - 'voiceCall' stream goes to earpiece by default
await AudioSession.configureAudio({
android: {
audioTypeOptions: {
manageAudioFocus: true,
audioMode: useSpeaker ? 'normal' : 'inCommunication',
audioFocusMode: 'gain',
// Key difference: music→speaker, voiceCall→earpiece
audioStreamType: useSpeaker ? 'music' : 'voiceCall',
audioAttributesUsageType: useSpeaker ? 'media' : 'voiceCommunication',
audioAttributesContentType: useSpeaker ? 'music' : 'speech',
},
// Also set preferred output list
preferredOutputList: useSpeaker ? ['speaker'] : ['earpiece'],
forceHandleAudioRouting: true,
},
});
}
console.log(`[AudioSession] Audio output set to ${useSpeaker ? 'SPEAKER' : 'EARPIECE'}`);
} catch (error) {
console.error('[AudioSession] setAudioOutput error:', error);
}
}