Improve voice call UX and disable agent interruption

Chat improvements:
- Add pulsing animation to voice call button during active call
- Log call start/end with duration to chat history
- End call automatically when deployment ID changes
- Reduce bottom padding (removed SafeArea bottom edge)

Julia Agent:
- Disable user interruption (min_interruption_duration=999)
- Agent now speaks without being interrupted

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Sergei 2026-01-24 21:51:20 -08:00
parent 5d2e8c029f
commit ad0fe41ee9
5 changed files with 228 additions and 300 deletions

View File

@ -17,6 +17,7 @@ import {
Keyboard,
Platform,
Alert,
Animated,
} from 'react-native';
import { KeyboardAvoidingView } from 'react-native-keyboard-controller';
import { Ionicons } from '@expo/vector-icons';
@ -36,12 +37,10 @@ import {
LiveKitRoom,
useVoiceAssistant,
useConnectionState,
useRoomContext,
BarVisualizer,
useTrackTranscription,
useTracks,
} from '@livekit/react-native';
import { ConnectionState, RoomEvent, Track, TranscriptionSegment } from 'livekit-client';
import { ConnectionState, Track } from 'livekit-client';
import { getToken, type BeneficiaryData } from '@/services/livekitService';
import { useAuth } from '@/contexts/AuthContext';
@ -128,21 +127,17 @@ function normalizeQuestion(userMessage: string): string {
}
// ============================================================================
// Voice Call Overlay Component
// Voice Call Transcript Handler (invisible - just captures transcripts)
// ============================================================================
interface VoiceCallOverlayProps {
onHangUp: () => void;
onMinimize: () => void;
interface VoiceCallTranscriptHandlerProps {
onTranscript: (role: 'user' | 'assistant', text: string) => void;
onDurationUpdate: (seconds: number) => void;
beneficiaryName?: string;
}
function VoiceCallContent({ onHangUp, onMinimize, onTranscript, onDurationUpdate, beneficiaryName }: VoiceCallOverlayProps) {
const room = useRoomContext();
function VoiceCallTranscriptHandler({ onTranscript, onDurationUpdate }: VoiceCallTranscriptHandlerProps) {
const connectionState = useConnectionState();
const { state: agentState, audioTrack } = useVoiceAssistant();
const { audioTrack } = useVoiceAssistant();
const [callDuration, setCallDuration] = useState(0);
const [lastProcessedId, setLastProcessedId] = useState<string | null>(null);
@ -181,15 +176,13 @@ function VoiceCallContent({ onHangUp, onMinimize, onTranscript, onDurationUpdate
}
}, [userSegments, lastUserSegmentId, onTranscript]);
// Call duration timer
// Call duration timer - use ref to avoid state updates during render
const durationRef = useRef(0);
useEffect(() => {
if (connectionState === ConnectionState.Connected) {
const interval = setInterval(() => {
setCallDuration(prev => {
const newDuration = prev + 1;
onDurationUpdate(newDuration);
return newDuration;
});
durationRef.current += 1;
onDurationUpdate(durationRef.current);
}, 1000);
return () => clearInterval(interval);
}
@ -203,187 +196,10 @@ function VoiceCallContent({ onHangUp, onMinimize, onTranscript, onDurationUpdate
};
}, []);
// Format duration as mm:ss
const formatDuration = (seconds: number) => {
const mins = Math.floor(seconds / 60);
const secs = seconds % 60;
return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
};
// Get status text based on agent state
const getStatusText = () => {
if (connectionState === ConnectionState.Connecting) return 'Connecting...';
if (connectionState === ConnectionState.Reconnecting) return 'Reconnecting...';
if (connectionState !== ConnectionState.Connected) return 'Disconnected';
switch (agentState) {
case 'listening': return 'Listening...';
case 'thinking': return 'Thinking...';
case 'speaking': return 'Speaking...';
case 'connecting': return 'Connecting to Julia...';
case 'initializing': return 'Starting...';
default: return 'Connected';
}
};
return (
<View style={voiceStyles.container}>
<View style={voiceStyles.content}>
{/* Avatar */}
<View style={voiceStyles.avatarContainer}>
<View style={[
voiceStyles.avatar,
agentState === 'speaking' && voiceStyles.avatarSpeaking,
]}>
<Text style={voiceStyles.avatarText}>J</Text>
</View>
{agentState === 'speaking' && (
<View style={voiceStyles.speakingRing} />
)}
</View>
{/* Name and status */}
<Text style={voiceStyles.name}>Julia AI</Text>
{beneficiaryName && (
<Text style={voiceStyles.beneficiary}>About {beneficiaryName}</Text>
)}
<Text style={voiceStyles.status}>{getStatusText()}</Text>
{/* Duration */}
{connectionState === ConnectionState.Connected && (
<Text style={voiceStyles.duration}>{formatDuration(callDuration)}</Text>
)}
{/* Audio Visualizer */}
{audioTrack && agentState === 'speaking' && (
<View style={voiceStyles.visualizerContainer}>
<BarVisualizer
trackRef={{ participant: audioTrack.participant, source: Track.Source.Microphone, publication: audioTrack.publication }}
barCount={5}
options={{ minHeight: 10 }}
/>
</View>
)}
</View>
{/* Call controls */}
<View style={voiceStyles.callControls}>
{/* Minimize button */}
<TouchableOpacity style={voiceStyles.minimizeButton} onPress={onMinimize}>
<Ionicons name="chevron-down" size={28} color={AppColors.white} />
</TouchableOpacity>
{/* Hang up button */}
<TouchableOpacity style={voiceStyles.hangUpButton} onPress={onHangUp}>
<Ionicons name="call" size={32} color={AppColors.white} style={{ transform: [{ rotate: '135deg' }] }} />
</TouchableOpacity>
{/* Placeholder for symmetry */}
<View style={voiceStyles.controlPlaceholder} />
</View>
</View>
);
// This component renders nothing - it just handles transcripts
return null;
}
const voiceStyles = StyleSheet.create({
container: {
flex: 1,
backgroundColor: 'rgba(0, 0, 0, 0.95)',
justifyContent: 'space-between',
alignItems: 'center',
paddingVertical: 60,
},
content: {
flex: 1,
justifyContent: 'center',
alignItems: 'center',
},
avatarContainer: {
position: 'relative',
marginBottom: Spacing.lg,
},
avatar: {
width: 120,
height: 120,
borderRadius: 60,
backgroundColor: AppColors.success,
justifyContent: 'center',
alignItems: 'center',
},
avatarSpeaking: {
backgroundColor: AppColors.primary,
},
avatarText: {
fontSize: 48,
fontWeight: '600',
color: AppColors.white,
},
speakingRing: {
position: 'absolute',
top: -10,
left: -10,
right: -10,
bottom: -10,
borderRadius: 70,
borderWidth: 3,
borderColor: AppColors.primary,
opacity: 0.5,
},
name: {
fontSize: FontSizes['2xl'],
fontWeight: '600',
color: AppColors.white,
marginBottom: Spacing.xs,
},
beneficiary: {
fontSize: FontSizes.base,
color: 'rgba(255, 255, 255, 0.7)',
marginBottom: Spacing.sm,
},
status: {
fontSize: FontSizes.base,
color: AppColors.success,
marginBottom: Spacing.md,
},
duration: {
fontSize: FontSizes.lg,
color: 'rgba(255, 255, 255, 0.8)',
fontVariant: ['tabular-nums'],
},
visualizerContainer: {
marginTop: Spacing.xl,
height: 60,
width: 200,
},
callControls: {
flexDirection: 'row',
alignItems: 'center',
justifyContent: 'center',
gap: Spacing.xl,
marginBottom: Spacing.xl,
},
minimizeButton: {
width: 56,
height: 56,
borderRadius: 28,
backgroundColor: 'rgba(255, 255, 255, 0.2)',
justifyContent: 'center',
alignItems: 'center',
},
hangUpButton: {
width: 72,
height: 72,
borderRadius: 36,
backgroundColor: AppColors.error,
justifyContent: 'center',
alignItems: 'center',
},
controlPlaceholder: {
width: 56,
height: 56,
},
});
export default function ChatScreen() {
const router = useRouter();
const { currentBeneficiary, setCurrentBeneficiary } = useBeneficiary();
@ -399,20 +215,51 @@ export default function ChatScreen() {
isCallActive,
} = useVoiceCall();
// Chat state
const [messages, setMessages] = useState<Message[]>([
{
// Helper to create initial message with deployment ID
const createInitialMessage = useCallback((deploymentId?: string | null): Message => ({
id: '1',
role: 'assistant',
content: 'Hello! I\'m Julia, your AI wellness assistant. You can type a message or tap the phone button to start a voice call.',
content: `Hello! I'm Julia, your AI wellness companion.${deploymentId ? `\n\nDeployment ID: ${deploymentId}` : ''}\n\nTap the phone button to start a voice call, or type a message below.`,
timestamp: new Date(),
},
]);
}), []);
// Custom deployment ID from settings
const [customDeploymentId, setCustomDeploymentId] = useState<string | null>(null);
// Chat state - initialized after deployment ID is loaded
const [messages, setMessages] = useState<Message[]>([createInitialMessage(null)]);
const [sortNewestFirst, setSortNewestFirst] = useState(false);
// Voice call state (local connecting state only)
const [isConnectingVoice, setIsConnectingVoice] = useState(false);
// Pulsing animation for active call
const pulseAnim = useRef(new Animated.Value(1)).current;
// Start pulsing animation when call is active
useEffect(() => {
if (isCallActive) {
const pulse = Animated.loop(
Animated.sequence([
Animated.timing(pulseAnim, {
toValue: 1.15,
duration: 600,
useNativeDriver: true,
}),
Animated.timing(pulseAnim, {
toValue: 1,
duration: 600,
useNativeDriver: true,
}),
])
);
pulse.start();
return () => pulse.stop();
} else {
pulseAnim.setValue(1);
}
}, [isCallActive, pulseAnim]);
// Track if we've shown the voice call separator for current call
const [hasShownVoiceSeparator, setHasShownVoiceSeparator] = useState(false);
@ -433,17 +280,40 @@ export default function ChatScreen() {
const [beneficiaries, setBeneficiaries] = useState<Beneficiary[]>([]);
const [loadingBeneficiaries, setLoadingBeneficiaries] = useState(false);
// Custom deployment ID from settings
const [customDeploymentId, setCustomDeploymentId] = useState<string | null>(null);
// Load custom deployment ID from settings
// Load custom deployment ID from settings and update initial message
useEffect(() => {
const loadCustomDeploymentId = async () => {
const saved = await api.getDeploymentId();
setCustomDeploymentId(saved);
// Update initial message with deployment ID
if (saved) {
setMessages([createInitialMessage(saved)]);
}
};
loadCustomDeploymentId();
}, []);
}, [createInitialMessage]);
// When deployment ID changes, end call and clear chat
const previousDeploymentId = useRef<string | null>(null);
useEffect(() => {
// Skip initial load
if (previousDeploymentId.current === null) {
previousDeploymentId.current = customDeploymentId;
return;
}
// If deployment ID actually changed
if (previousDeploymentId.current !== customDeploymentId) {
console.log('[Chat] Deployment ID changed, ending call and clearing chat');
// End any active call
if (isCallActive) {
endVoiceCallContext();
}
// Clear chat with new initial message
setMessages([createInitialMessage(customDeploymentId)]);
setHasShownVoiceSeparator(false);
previousDeploymentId.current = customDeploymentId;
}
}, [customDeploymentId, createInitialMessage, isCallActive, endVoiceCallContext]);
// Load beneficiaries
const loadBeneficiaries = useCallback(async () => {
@ -546,6 +416,16 @@ export default function ChatScreen() {
console.log('[Chat] Got voice token, connecting to room:', tokenResponse.data.roomName);
// Add call start message to chat
const callStartMessage: Message = {
id: `call-start-${Date.now()}`,
role: 'assistant',
content: 'Voice call started',
timestamp: new Date(),
isSystem: true,
};
setMessages(prev => [...prev, callStartMessage]);
// Clear previous transcript and start call via context
clearTranscript();
startCall({
@ -565,29 +445,33 @@ export default function ChatScreen() {
}
}, [isConnectingVoice, isCallActive, currentBeneficiary, beneficiaries, user, clearTranscript, startCall, customDeploymentId]);
// End voice call
// End voice call and log to chat
const endVoiceCall = useCallback(() => {
console.log('[Chat] Ending voice call...');
// Add call end message to chat with duration
const duration = callState.callDuration;
const minutes = Math.floor(duration / 60);
const seconds = duration % 60;
const durationStr = `${minutes}:${seconds.toString().padStart(2, '0')}`;
const callEndMessage: Message = {
id: `call-end-${Date.now()}`,
role: 'assistant',
content: `Call ended (${durationStr})`,
timestamp: new Date(),
isSystem: true,
};
setMessages(prev => [...prev, callEndMessage]);
setHasShownVoiceSeparator(false);
endVoiceCallContext();
}, [endVoiceCallContext]);
}, [endVoiceCallContext, callState.callDuration]);
// Handle voice transcript entries - add to chat in real-time
const handleVoiceTranscript = useCallback((role: 'user' | 'assistant', text: string) => {
if (!text.trim()) return;
// Add separator before first voice message of this call
if (!hasShownVoiceSeparator) {
const separatorMessage: Message = {
id: `voice-separator-${Date.now()}`,
role: 'assistant',
content: 'Voice Call',
timestamp: new Date(),
isSystem: true,
};
setMessages(prev => [...prev, separatorMessage]);
setHasShownVoiceSeparator(true);
}
// Create voice message and add to chat immediately
const voiceMessage: Message = {
id: `voice-${Date.now()}-${Math.random().toString(36).slice(2)}`,
@ -772,7 +656,7 @@ export default function ChatScreen() {
};
return (
<SafeAreaView style={styles.container} edges={['top', 'bottom']}>
<SafeAreaView style={styles.container} edges={['top']}>
{/* Header */}
<View style={styles.header}>
<TouchableOpacity style={styles.backButton} onPress={() => router.push('/(tabs)')}>
@ -904,23 +788,37 @@ export default function ChatScreen() {
{/* Input */}
<View style={styles.inputContainer}>
{/* Voice Call Button */}
{/* Voice Call Button - becomes pulsing bubble during call */}
<Animated.View style={{ transform: [{ scale: pulseAnim }] }}>
<TouchableOpacity
style={[
styles.voiceButton,
(isConnectingVoice || isCallActive) && styles.voiceButtonConnecting,
isConnectingVoice && styles.voiceButtonConnecting,
isCallActive && styles.voiceButtonActive,
]}
onPress={isCallActive ? maximizeCall : startVoiceCall}
onPress={isCallActive ? endVoiceCall : startVoiceCall}
disabled={isConnectingVoice}
>
{isConnectingVoice ? (
<ActivityIndicator size="small" color={AppColors.primary} />
) : isCallActive ? (
<Ionicons name="call" size={20} color={AppColors.success} />
<View style={styles.callActiveIndicator}>
<Ionicons name="call" size={20} color={AppColors.white} />
</View>
) : (
<Ionicons name="call" size={20} color={AppColors.primary} />
)}
</TouchableOpacity>
</Animated.View>
{/* Call duration badge */}
{isCallActive && (
<View style={styles.callDurationBadge}>
<Text style={styles.callDurationText}>
{Math.floor(callState.callDuration / 60).toString().padStart(2, '0')}:
{(callState.callDuration % 60).toString().padStart(2, '0')}
</Text>
</View>
)}
<TextInput
style={styles.input}
@ -946,15 +844,8 @@ export default function ChatScreen() {
</View>
</KeyboardAvoidingView>
{/* Voice Call Modal */}
<Modal
visible={isCallActive && !callState.isMinimized}
animationType="slide"
presentationStyle="fullScreen"
onRequestClose={minimizeCall}
>
<SafeAreaView style={{ flex: 1, backgroundColor: 'black' }} edges={['top', 'bottom']}>
{callState.token && callState.wsUrl ? (
{/* Invisible LiveKit Room - runs in background during call */}
{isCallActive && callState.token && callState.wsUrl && (
<LiveKitRoom
serverUrl={callState.wsUrl}
token={callState.token}
@ -969,23 +860,13 @@ export default function ChatScreen() {
endVoiceCall();
}}
>
<VoiceCallContent
onHangUp={endVoiceCall}
onMinimize={minimizeCall}
<VoiceCallTranscriptHandler
onTranscript={handleVoiceTranscript}
onDurationUpdate={updateDuration}
beneficiaryName={currentBeneficiary?.name}
/>
</LiveKitRoom>
) : (
<View style={{ flex: 1, justifyContent: 'center', alignItems: 'center' }}>
<ActivityIndicator size="large" color={AppColors.primary} />
<Text style={{ color: 'white', marginTop: 16 }}>Connecting...</Text>
</View>
)}
</SafeAreaView>
</Modal>
</SafeAreaView>
);
}
@ -1138,6 +1019,33 @@ const styles = StyleSheet.create({
borderColor: AppColors.success,
backgroundColor: 'rgba(90, 200, 168, 0.1)',
},
voiceButtonActive: {
backgroundColor: AppColors.success,
borderColor: AppColors.success,
},
callActiveIndicator: {
width: '100%',
height: '100%',
justifyContent: 'center',
alignItems: 'center',
},
callDurationBadge: {
position: 'absolute',
left: 32,
top: -8,
backgroundColor: AppColors.success,
paddingHorizontal: 6,
paddingVertical: 2,
borderRadius: 8,
minWidth: 42,
alignItems: 'center',
},
callDurationText: {
fontSize: 10,
fontWeight: '600',
color: AppColors.white,
fontVariant: ['tabular-nums'],
},
sendButton: {
width: 44,
height: 44,

View File

@ -166,8 +166,8 @@ export default function ProfileScreen() {
<View style={styles.menuCard}>
<MenuItem
icon="server-outline"
title="Deployment ID"
subtitle={deploymentId ? `${deploymentId}${deploymentName ? ` (${deploymentName})` : ''}` : 'Not set (auto)'}
title="Deployment"
subtitle={deploymentId ? (deploymentName || `ID: ${deploymentId}`) : 'Auto'}
onPress={openDeploymentModal}
/>
</View>

View File

@ -6,3 +6,9 @@ id = "CA_Yd3qcuYEVKKE"
[build]
dockerfile = "Dockerfile"
[env]
# Deepgram for TTS
DEEPGRAM_API_KEY = "cec33b489b0ba12c4e4f1ea888e887e88fba5848"
# AssemblyAI for STT (best accuracy - correctly recognizes "dad" vs "dead")
ASSEMBLYAI_API_KEY = "42e753b65b6a4360ae4a77ac76961857"

View File

@ -12,6 +12,8 @@ dependencies = [
"livekit-agents[silero]~=1.3",
"livekit-plugins-noise-cancellation~=0.2",
"livekit-plugins-deepgram~=1.0",
# Removed assemblyai - was giving garbage transcriptions
# Deepgram Nova-2 is faster and more accurate
"python-dotenv",
"aiohttp",
]

View File

@ -312,7 +312,14 @@ class WellNuoLLMStream(llm.LLMStream):
def prewarm(proc: JobProcess):
"""Preload VAD model for faster startup."""
proc.userdata["vad"] = silero.VAD.load()
# Increase min_silence_duration to prevent cutting off user speech during barge-in
# Default is 0.55s which is too short - user pauses between words get interpreted as end of speech
# 0.9s gives user more time to continue speaking without being cut off
proc.userdata["vad"] = silero.VAD.load(
min_silence_duration=0.9, # Wait 0.9s of silence before ending speech (default: 0.55)
min_speech_duration=0.05, # Keep low for quick interruption detection (default: 0.05)
activation_threshold=0.4, # Slightly lower for better sensitivity (default: 0.5)
)
async def wait_for_participant_with_metadata(
@ -389,34 +396,39 @@ async def entrypoint(ctx: JobContext):
logger.info(f"Starting Julia AI session in room {ctx.room.name}")
# Wait for participant with metadata (fixes race condition)
# The mobile app sends deploymentId and beneficiaryNamesDict in token metadata
# Wait for participant with metadata - short timeout since metadata arrives immediately if present
# The mobile app sends deploymentId via token metadata
deployment_id, beneficiary_names_dict = await wait_for_participant_with_metadata(
ctx, timeout=10.0
ctx, timeout=2.0 # 2 seconds is enough - if metadata exists, it arrives within 0.5s
)
# Log what we're using
# Use deployment_id from metadata, or fall back to default
effective_deployment_id = deployment_id or DEPLOYMENT_ID
logger.info(
f"Using WellNuo ask_wellnuo_ai API with deployment_id: {effective_deployment_id}"
)
if beneficiary_names_dict:
logger.info(f"Beneficiary names dict: {beneficiary_names_dict}")
else:
logger.info("No beneficiary_names_dict provided, using default behavior")
logger.info(f"Using deployment_id={effective_deployment_id} (from_metadata={deployment_id is not None})")
# Deepgram for STT - better accuracy and faster than AssemblyAI
# AssemblyAI was giving garbage like "shambhala balashambal" instead of actual speech
session = AgentSession(
# Deepgram Nova-2 for accurate speech-to-text
stt=deepgram.STT(model="nova-2"),
# Deepgram Nova-2 model for best STT accuracy
stt=deepgram.STT(
model="nova-2-general",
language="en-US",
smart_format=True, # Better punctuation and formatting
no_delay=True, # Faster response for real-time
),
# WellNuo voice_ask API for LLM with dynamic beneficiary data
llm=WellNuoLLM(
deployment_id=deployment_id,
deployment_id=effective_deployment_id,
beneficiary_names_dict=beneficiary_names_dict,
),
# Deepgram Aura Asteria for natural female voice
tts=deepgram.TTS(model="aura-asteria-en"),
# Silero VAD for voice activity detection
# Silero VAD for voice activity detection (prewarmed with tuned settings)
vad=ctx.proc.userdata["vad"],
# INTERRUPTION SETTINGS:
# min_interruption_duration: How long user must speak to trigger interruption (default 0.5s)
# Set to 999.0 to effectively DISABLE interruption - user cannot interrupt the agent
min_interruption_duration=999.0,
)
# Start the session with Julia assistant