From 6d017ea617497dbc78c811b83bbcfc7c0831cbe4 Mon Sep 17 00:00:00 2001
From: Sergei <sergei@Sergeis-MacBook-Pro.local>
Date: Sat, 24 Jan 2026 15:22:21 -0800
Subject: [PATCH] WellNuo Lite Robert - Julia Robust Agent (no barge-in)

Changes:
- Updated livekitService.ts to use remote token server
- Julia-robust agent with disabled interruptions
- Added discard_audio_if_uninterruptible=True
- Added min_interruption_duration=2.0
- Token server configured for julia-robust agent
---
 .gitignore                          |  1 +
 app.json                            |  2 +-
 app/(tabs)/_layout.tsx              |  7 ++-
 julia-agent/julia-ai/livekit.toml   |  6 +++
 julia-agent/julia-ai/pyproject.toml |  2 +
 julia-agent/julia-ai/src/agent.py   | 67 ++++++++++++++++++++---------
 julia-agent/token-server/server.js  |  3 +-
 services/livekitService.ts          |  1 +
 8 files changed, 64 insertions(+), 25 deletions(-)
diff --git a/.gitignore b/.gitignore
index d5b9584..e331b03 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,4 @@ store-screenshots/
 
 # Build artifacts
 WellNuoLite-Android/
+julia-agent/julia-ai/google-credentials.json
diff --git a/app.json b/app.json
index 4bf329f..58fea50 100644
--- a/app.json
+++ b/app.json
@@ -1,6 +1,6 @@
 {
   "expo": {
-    "name": "WellNuo",
+    "name": "WellNuo ROBUST",
     "slug": "WellNuo",
     "version": "1.0.5",
     "orientation": "portrait",
diff --git a/app/(tabs)/_layout.tsx b/app/(tabs)/_layout.tsx
index 031e5b0..c058465 100644
--- a/app/(tabs)/_layout.tsx
+++ b/app/(tabs)/_layout.tsx
@@ -72,11 +72,14 @@ export default function TabLayout() {
           ),
         }}
       />
-      {/* Debug tab - HIDDEN, no longer needed */}
+      {/* Debug tab - Voice call debugging with detailed logs */}
       <Tabs.Screen
         name="debug"
         options={{
-          href: null,
+          title: 'Debug',
+          tabBarIcon: ({ color, size }) => (
+            <Feather name="terminal" size={22} color={color} />
+          ),
         }}
       />
       {/* Hide explore tab */}
diff --git a/julia-agent/julia-ai/livekit.toml b/julia-agent/julia-ai/livekit.toml
index fe1c113..51f4b64 100644
--- a/julia-agent/julia-ai/livekit.toml
+++ b/julia-agent/julia-ai/livekit.toml
@@ -6,3 +6,9 @@ id = "CA_Yd3qcuYEVKKE"
 
 [build]
 dockerfile = "Dockerfile"
+
+[env]
+# Deepgram for TTS
+DEEPGRAM_API_KEY = "cec33b489b0ba12c4e4f1ea888e887e88fba5848"
+# AssemblyAI for STT (best accuracy - correctly recognizes "dad" vs "dead")
+ASSEMBLYAI_API_KEY = "42e753b65b6a4360ae4a77ac76961857"
diff --git a/julia-agent/julia-ai/pyproject.toml b/julia-agent/julia-ai/pyproject.toml
index 09ebec4..49a3411 100644
--- a/julia-agent/julia-ai/pyproject.toml
+++ b/julia-agent/julia-ai/pyproject.toml
@@ -12,6 +12,8 @@ dependencies = [
     "livekit-agents[silero]~=1.3",
     "livekit-plugins-noise-cancellation~=0.2",
     "livekit-plugins-deepgram~=1.0",
+    # Removed assemblyai - was giving garbage transcriptions
+    # Deepgram Nova-2 is faster and more accurate
     "python-dotenv",
     "aiohttp",
 ]
diff --git a/julia-agent/julia-ai/src/agent.py b/julia-agent/julia-ai/src/agent.py
index 3dc4ad1..1161cb2 100644
--- a/julia-agent/julia-ai/src/agent.py
+++ b/julia-agent/julia-ai/src/agent.py
@@ -1,7 +1,10 @@
 """
-WellNuo Voice Agent - Julia AI
+WellNuo Voice Agent - Julia Robust (NO BARGE-IN)
 LiveKit Agents Cloud deployment
 Uses WellNuo ask_wellnuo_ai API for LLM responses, Deepgram for STT/TTS
+
+ROBUST MODE: Barge-in is DISABLED - user cannot interrupt the agent.
+This prevents hallucinations from background noise being interpreted as speech.
 """
 
 import json
@@ -24,7 +27,7 @@ from livekit.agents import (
 from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, APIConnectOptions
 from livekit.plugins import deepgram, noise_cancellation, silero
 
-logger = logging.getLogger("julia-ai")
+logger = logging.getLogger("julia-robust")
 
 # WellNuo API Configuration
 WELLNUO_API_URL = "https://eluxnetworks.net/function/well-api/api"
@@ -311,8 +314,17 @@ class WellNuoLLMStream(llm.LLMStream):
 
 
 def prewarm(proc: JobProcess):
-    """Preload VAD model for faster startup."""
-    proc.userdata["vad"] = silero.VAD.load()
+    """Preload VAD model for faster startup.
+
+    ROBUST MODE: VAD is still needed for detecting when user FINISHES speaking,
+    but we'll disable interruptions in the AgentSession.
+    High thresholds to only respond to clear, deliberate speech.
+    """
+    proc.userdata["vad"] = silero.VAD.load(
+        min_silence_duration=1.5,   # Wait 1.5s of silence before ending speech (very patient)
+        min_speech_duration=0.3,    # Require 0.3s of speech to start (filter short noises)
+        activation_threshold=0.6,   # Higher threshold - only clear speech triggers (default: 0.5)
+    )
 
 
 async def wait_for_participant_with_metadata(
@@ -387,36 +399,48 @@ async def entrypoint(ctx: JobContext):
     # CRITICAL: Must connect to room first before accessing ctx.room
     await ctx.connect()
 
-    logger.info(f"Starting Julia AI session in room {ctx.room.name}")
+    logger.info(f"Starting Julia ROBUST (no barge-in) session in room {ctx.room.name}")
 
-    # Wait for participant with metadata (fixes race condition)
-    # The mobile app sends deploymentId and beneficiaryNamesDict in token metadata
+    # Wait for participant with metadata - short timeout since metadata arrives immediately if present
+    # The mobile app sends deploymentId via token metadata
     deployment_id, beneficiary_names_dict = await wait_for_participant_with_metadata(
-        ctx, timeout=10.0
+        ctx, timeout=2.0  # 2 seconds is enough - if metadata exists, it arrives within 0.5s
     )
 
-    # Log what we're using
+    # Use deployment_id from metadata, or fall back to default
     effective_deployment_id = deployment_id or DEPLOYMENT_ID
-    logger.info(
-        f"Using WellNuo ask_wellnuo_ai API with deployment_id: {effective_deployment_id}"
-    )
-    if beneficiary_names_dict:
-        logger.info(f"Beneficiary names dict: {beneficiary_names_dict}")
-    else:
-        logger.info("No beneficiary_names_dict provided, using default behavior")
+    logger.info(f"Using deployment_id={effective_deployment_id} (from_metadata={deployment_id is not None})")
 
+    # ROBUST MODE: Barge-in DISABLED
+    # User cannot interrupt the agent while it's speaking.
+    # This prevents hallucinations from background noise.
     session = AgentSession(
-        # Deepgram Nova-2 for accurate speech-to-text
-        stt=deepgram.STT(model="nova-2"),
+        # Deepgram Nova-2 model for best STT accuracy
+        stt=deepgram.STT(
+            model="nova-2-general",
+            language="en-US",
+            smart_format=True,  # Better punctuation and formatting
+            no_delay=True,      # Faster response for real-time
+        ),
         # WellNuo voice_ask API for LLM with dynamic beneficiary data
         llm=WellNuoLLM(
-            deployment_id=deployment_id,
+            deployment_id=effective_deployment_id,
             beneficiary_names_dict=beneficiary_names_dict,
         ),
         # Deepgram Aura Asteria for natural female voice
         tts=deepgram.TTS(model="aura-asteria-en"),
-        # Silero VAD for voice activity detection
+        # Silero VAD for voice activity detection (strict settings)
         vad=ctx.proc.userdata["vad"],
+        # DISABLE INTERRUPTIONS COMPLETELY:
+        # allow_interruptions=False means user cannot interrupt agent while speaking
+        # This prevents "hallucinations" from random noises being interpreted as speech
+        allow_interruptions=False,
+        # CRITICAL: Discard any audio captured while agent is speaking
+        # This ensures user speech during agent output is completely ignored
+        # See: https://github.com/livekit/agents/issues/4316
+        discard_audio_if_uninterruptible=True,
+        # Require longer speech before processing (filter out short noises)
+        min_interruption_duration=2.0,
     )
 
     # Start the session with Julia assistant
@@ -441,6 +465,7 @@ if __name__ == "__main__":
             entrypoint_fnc=entrypoint,
             prewarm_fnc=prewarm,
             # Agent name must match what token requests (AGENT_NAME in livekit.js)
-            agent_name="julia-ai",
+            # ROBUST version - separate from julia-ai
+            agent_name="julia-robust",
         )
     )
diff --git a/julia-agent/token-server/server.js b/julia-agent/token-server/server.js
index 69ed92f..89a1f35 100644
--- a/julia-agent/token-server/server.js
+++ b/julia-agent/token-server/server.js
@@ -10,7 +10,8 @@ app.use(express.json());
 const LIVEKIT_API_KEY = process.env.LIVEKIT_API_KEY || 'APIEivUcPW3WSrV';
 const LIVEKIT_API_SECRET = process.env.LIVEKIT_API_SECRET || 'A65mc5KUKE0VGdZNaMRwe6uJpA9ZQPAxS66akZTOfmL';
 const LIVEKIT_URL = 'wss://live-kit-demo-70txlh6a.livekit.cloud';
-const AGENT_NAME = 'julia-ai';
+// ROBUST MODE: Use julia-robust agent (no barge-in)
+const AGENT_NAME = 'julia-robust';
 
 // Health check
 app.get('/health', (req, res) => {
diff --git a/services/livekitService.ts b/services/livekitService.ts
index 4824eea..4d72460 100644
--- a/services/livekitService.ts
+++ b/services/livekitService.ts
@@ -5,6 +5,7 @@
  */
 
 // Julia Token Server (dedicated endpoint for LiveKit tokens)
+// Production: Use remote Julia Token Server
 const JULIA_TOKEN_SERVER = 'https://wellnuo.smartlaunchhub.com/julia';
 
 // Voice configuration