From 6d017ea617497dbc78c811b83bbcfc7c0831cbe4 Mon Sep 17 00:00:00 2001 From: Sergei Date: Sat, 24 Jan 2026 15:22:21 -0800 Subject: [PATCH] WellNuo Lite Robert - Julia Robust Agent (no barge-in) Changes: - Updated livekitService.ts to use remote token server - Julia-robust agent with disabled interruptions - Added discard_audio_if_uninterruptible=True - Added min_interruption_duration=2.0 - Token server configured for julia-robust agent --- .gitignore | 1 + app.json | 2 +- app/(tabs)/_layout.tsx | 7 ++- julia-agent/julia-ai/livekit.toml | 6 +++ julia-agent/julia-ai/pyproject.toml | 2 + julia-agent/julia-ai/src/agent.py | 67 ++++++++++++++++++++--------- julia-agent/token-server/server.js | 3 +- services/livekitService.ts | 1 + 8 files changed, 64 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index d5b9584..e331b03 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,4 @@ store-screenshots/ # Build artifacts WellNuoLite-Android/ +julia-agent/julia-ai/google-credentials.json diff --git a/app.json b/app.json index 4bf329f..58fea50 100644 --- a/app.json +++ b/app.json @@ -1,6 +1,6 @@ { "expo": { - "name": "WellNuo", + "name": "WellNuo ROBUST", "slug": "WellNuo", "version": "1.0.5", "orientation": "portrait", diff --git a/app/(tabs)/_layout.tsx b/app/(tabs)/_layout.tsx index 031e5b0..c058465 100644 --- a/app/(tabs)/_layout.tsx +++ b/app/(tabs)/_layout.tsx @@ -72,11 +72,14 @@ export default function TabLayout() { ), }} /> - {/* Debug tab - HIDDEN, no longer needed */} + {/* Debug tab - Voice call debugging with detailed logs */} ( + + ), }} /> {/* Hide explore tab */} diff --git a/julia-agent/julia-ai/livekit.toml b/julia-agent/julia-ai/livekit.toml index fe1c113..51f4b64 100644 --- a/julia-agent/julia-ai/livekit.toml +++ b/julia-agent/julia-ai/livekit.toml @@ -6,3 +6,9 @@ id = "CA_Yd3qcuYEVKKE" [build] dockerfile = "Dockerfile" + +[env] +# Deepgram for TTS +DEEPGRAM_API_KEY = "cec33b489b0ba12c4e4f1ea888e887e88fba5848" +# AssemblyAI for STT (best accuracy - correctly recognizes "dad" vs "dead") +ASSEMBLYAI_API_KEY = "42e753b65b6a4360ae4a77ac76961857" diff --git a/julia-agent/julia-ai/pyproject.toml b/julia-agent/julia-ai/pyproject.toml index 09ebec4..49a3411 100644 --- a/julia-agent/julia-ai/pyproject.toml +++ b/julia-agent/julia-ai/pyproject.toml @@ -12,6 +12,8 @@ dependencies = [ "livekit-agents[silero]~=1.3", "livekit-plugins-noise-cancellation~=0.2", "livekit-plugins-deepgram~=1.0", + # Removed assemblyai - was giving garbage transcriptions + # Deepgram Nova-2 is faster and more accurate "python-dotenv", "aiohttp", ] diff --git a/julia-agent/julia-ai/src/agent.py b/julia-agent/julia-ai/src/agent.py index 3dc4ad1..1161cb2 100644 --- a/julia-agent/julia-ai/src/agent.py +++ b/julia-agent/julia-ai/src/agent.py @@ -1,7 +1,10 @@ """ -WellNuo Voice Agent - Julia AI +WellNuo Voice Agent - Julia Robust (NO BARGE-IN) LiveKit Agents Cloud deployment Uses WellNuo ask_wellnuo_ai API for LLM responses, Deepgram for STT/TTS + +ROBUST MODE: Barge-in is DISABLED - user cannot interrupt the agent. +This prevents hallucinations from background noise being interpreted as speech. """ import json @@ -24,7 +27,7 @@ from livekit.agents import ( from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, APIConnectOptions from livekit.plugins import deepgram, noise_cancellation, silero -logger = logging.getLogger("julia-ai") +logger = logging.getLogger("julia-robust") # WellNuo API Configuration WELLNUO_API_URL = "https://eluxnetworks.net/function/well-api/api" @@ -311,8 +314,17 @@ class WellNuoLLMStream(llm.LLMStream): def prewarm(proc: JobProcess): - """Preload VAD model for faster startup.""" - proc.userdata["vad"] = silero.VAD.load() + """Preload VAD model for faster startup. + + ROBUST MODE: VAD is still needed for detecting when user FINISHES speaking, + but we'll disable interruptions in the AgentSession. + High thresholds to only respond to clear, deliberate speech. + """ + proc.userdata["vad"] = silero.VAD.load( + min_silence_duration=1.5, # Wait 1.5s of silence before ending speech (very patient) + min_speech_duration=0.3, # Require 0.3s of speech to start (filter short noises) + activation_threshold=0.6, # Higher threshold - only clear speech triggers (default: 0.5) + ) async def wait_for_participant_with_metadata( @@ -387,36 +399,48 @@ async def entrypoint(ctx: JobContext): # CRITICAL: Must connect to room first before accessing ctx.room await ctx.connect() - logger.info(f"Starting Julia AI session in room {ctx.room.name}") + logger.info(f"Starting Julia ROBUST (no barge-in) session in room {ctx.room.name}") - # Wait for participant with metadata (fixes race condition) - # The mobile app sends deploymentId and beneficiaryNamesDict in token metadata + # Wait for participant with metadata - short timeout since metadata arrives immediately if present + # The mobile app sends deploymentId via token metadata deployment_id, beneficiary_names_dict = await wait_for_participant_with_metadata( - ctx, timeout=10.0 + ctx, timeout=2.0 # 2 seconds is enough - if metadata exists, it arrives within 0.5s ) - # Log what we're using + # Use deployment_id from metadata, or fall back to default effective_deployment_id = deployment_id or DEPLOYMENT_ID - logger.info( - f"Using WellNuo ask_wellnuo_ai API with deployment_id: {effective_deployment_id}" - ) - if beneficiary_names_dict: - logger.info(f"Beneficiary names dict: {beneficiary_names_dict}") - else: - logger.info("No beneficiary_names_dict provided, using default behavior") + logger.info(f"Using deployment_id={effective_deployment_id} (from_metadata={deployment_id is not None})") + # ROBUST MODE: Barge-in DISABLED + # User cannot interrupt the agent while it's speaking. + # This prevents hallucinations from background noise. session = AgentSession( - # Deepgram Nova-2 for accurate speech-to-text - stt=deepgram.STT(model="nova-2"), + # Deepgram Nova-2 model for best STT accuracy + stt=deepgram.STT( + model="nova-2-general", + language="en-US", + smart_format=True, # Better punctuation and formatting + no_delay=True, # Faster response for real-time + ), # WellNuo voice_ask API for LLM with dynamic beneficiary data llm=WellNuoLLM( - deployment_id=deployment_id, + deployment_id=effective_deployment_id, beneficiary_names_dict=beneficiary_names_dict, ), # Deepgram Aura Asteria for natural female voice tts=deepgram.TTS(model="aura-asteria-en"), - # Silero VAD for voice activity detection + # Silero VAD for voice activity detection (strict settings) vad=ctx.proc.userdata["vad"], + # DISABLE INTERRUPTIONS COMPLETELY: + # allow_interruptions=False means user cannot interrupt agent while speaking + # This prevents "hallucinations" from random noises being interpreted as speech + allow_interruptions=False, + # CRITICAL: Discard any audio captured while agent is speaking + # This ensures user speech during agent output is completely ignored + # See: https://github.com/livekit/agents/issues/4316 + discard_audio_if_uninterruptible=True, + # Require longer speech before processing (filter out short noises) + min_interruption_duration=2.0, ) # Start the session with Julia assistant @@ -441,6 +465,7 @@ if __name__ == "__main__": entrypoint_fnc=entrypoint, prewarm_fnc=prewarm, # Agent name must match what token requests (AGENT_NAME in livekit.js) - agent_name="julia-ai", + # ROBUST version - separate from julia-ai + agent_name="julia-robust", ) ) diff --git a/julia-agent/token-server/server.js b/julia-agent/token-server/server.js index 69ed92f..89a1f35 100644 --- a/julia-agent/token-server/server.js +++ b/julia-agent/token-server/server.js @@ -10,7 +10,8 @@ app.use(express.json()); const LIVEKIT_API_KEY = process.env.LIVEKIT_API_KEY || 'APIEivUcPW3WSrV'; const LIVEKIT_API_SECRET = process.env.LIVEKIT_API_SECRET || 'A65mc5KUKE0VGdZNaMRwe6uJpA9ZQPAxS66akZTOfmL'; const LIVEKIT_URL = 'wss://live-kit-demo-70txlh6a.livekit.cloud'; -const AGENT_NAME = 'julia-ai'; +// ROBUST MODE: Use julia-robust agent (no barge-in) +const AGENT_NAME = 'julia-robust'; // Health check app.get('/health', (req, res) => { diff --git a/services/livekitService.ts b/services/livekitService.ts index 4824eea..4d72460 100644 --- a/services/livekitService.ts +++ b/services/livekitService.ts @@ -5,6 +5,7 @@ */ // Julia Token Server (dedicated endpoint for LiveKit tokens) +// Production: Use remote Julia Token Server const JULIA_TOKEN_SERVER = 'https://wellnuo.smartlaunchhub.com/julia'; // Voice configuration