WellNuo Lite Robert - Julia Robust Agent (no barge-in)

Changes:
- Updated livekitService.ts to use remote token server
- Julia-robust agent with disabled interruptions
- Added discard_audio_if_uninterruptible=True
- Added min_interruption_duration=2.0
- Token server configured for julia-robust agent
This commit is contained in:
Sergei 2026-01-24 15:22:21 -08:00
parent a578ec8081
commit 6d017ea617
8 changed files with 64 additions and 25 deletions

1
.gitignore vendored
View File

@ -54,3 +54,4 @@ store-screenshots/
# Build artifacts # Build artifacts
WellNuoLite-Android/ WellNuoLite-Android/
julia-agent/julia-ai/google-credentials.json

View File

@ -1,6 +1,6 @@
{ {
"expo": { "expo": {
"name": "WellNuo", "name": "WellNuo ROBUST",
"slug": "WellNuo", "slug": "WellNuo",
"version": "1.0.5", "version": "1.0.5",
"orientation": "portrait", "orientation": "portrait",

View File

@ -72,11 +72,14 @@ export default function TabLayout() {
), ),
}} }}
/> />
{/* Debug tab - HIDDEN, no longer needed */} {/* Debug tab - Voice call debugging with detailed logs */}
<Tabs.Screen <Tabs.Screen
name="debug" name="debug"
options={{ options={{
href: null, title: 'Debug',
tabBarIcon: ({ color, size }) => (
<Feather name="terminal" size={22} color={color} />
),
}} }}
/> />
{/* Hide explore tab */} {/* Hide explore tab */}

View File

@ -6,3 +6,9 @@ id = "CA_Yd3qcuYEVKKE"
[build] [build]
dockerfile = "Dockerfile" dockerfile = "Dockerfile"
[env]
# Deepgram for TTS
DEEPGRAM_API_KEY = "cec33b489b0ba12c4e4f1ea888e887e88fba5848"
# AssemblyAI for STT (best accuracy - correctly recognizes "dad" vs "dead")
ASSEMBLYAI_API_KEY = "42e753b65b6a4360ae4a77ac76961857"

View File

@ -12,6 +12,8 @@ dependencies = [
"livekit-agents[silero]~=1.3", "livekit-agents[silero]~=1.3",
"livekit-plugins-noise-cancellation~=0.2", "livekit-plugins-noise-cancellation~=0.2",
"livekit-plugins-deepgram~=1.0", "livekit-plugins-deepgram~=1.0",
# Removed assemblyai - was giving garbage transcriptions
# Deepgram Nova-2 is faster and more accurate
"python-dotenv", "python-dotenv",
"aiohttp", "aiohttp",
] ]

View File

@ -1,7 +1,10 @@
""" """
WellNuo Voice Agent - Julia AI WellNuo Voice Agent - Julia Robust (NO BARGE-IN)
LiveKit Agents Cloud deployment LiveKit Agents Cloud deployment
Uses WellNuo ask_wellnuo_ai API for LLM responses, Deepgram for STT/TTS Uses WellNuo ask_wellnuo_ai API for LLM responses, Deepgram for STT/TTS
ROBUST MODE: Barge-in is DISABLED - user cannot interrupt the agent.
This prevents hallucinations from background noise being interpreted as speech.
""" """
import json import json
@ -24,7 +27,7 @@ from livekit.agents import (
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, APIConnectOptions from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, APIConnectOptions
from livekit.plugins import deepgram, noise_cancellation, silero from livekit.plugins import deepgram, noise_cancellation, silero
logger = logging.getLogger("julia-ai") logger = logging.getLogger("julia-robust")
# WellNuo API Configuration # WellNuo API Configuration
WELLNUO_API_URL = "https://eluxnetworks.net/function/well-api/api" WELLNUO_API_URL = "https://eluxnetworks.net/function/well-api/api"
@ -311,8 +314,17 @@ class WellNuoLLMStream(llm.LLMStream):
def prewarm(proc: JobProcess): def prewarm(proc: JobProcess):
"""Preload VAD model for faster startup.""" """Preload VAD model for faster startup.
proc.userdata["vad"] = silero.VAD.load()
ROBUST MODE: VAD is still needed for detecting when user FINISHES speaking,
but we'll disable interruptions in the AgentSession.
High thresholds to only respond to clear, deliberate speech.
"""
proc.userdata["vad"] = silero.VAD.load(
min_silence_duration=1.5, # Wait 1.5s of silence before ending speech (very patient)
min_speech_duration=0.3, # Require 0.3s of speech to start (filter short noises)
activation_threshold=0.6, # Higher threshold - only clear speech triggers (default: 0.5)
)
async def wait_for_participant_with_metadata( async def wait_for_participant_with_metadata(
@ -387,36 +399,48 @@ async def entrypoint(ctx: JobContext):
# CRITICAL: Must connect to room first before accessing ctx.room # CRITICAL: Must connect to room first before accessing ctx.room
await ctx.connect() await ctx.connect()
logger.info(f"Starting Julia AI session in room {ctx.room.name}") logger.info(f"Starting Julia ROBUST (no barge-in) session in room {ctx.room.name}")
# Wait for participant with metadata (fixes race condition) # Wait for participant with metadata - short timeout since metadata arrives immediately if present
# The mobile app sends deploymentId and beneficiaryNamesDict in token metadata # The mobile app sends deploymentId via token metadata
deployment_id, beneficiary_names_dict = await wait_for_participant_with_metadata( deployment_id, beneficiary_names_dict = await wait_for_participant_with_metadata(
ctx, timeout=10.0 ctx, timeout=2.0 # 2 seconds is enough - if metadata exists, it arrives within 0.5s
) )
# Log what we're using # Use deployment_id from metadata, or fall back to default
effective_deployment_id = deployment_id or DEPLOYMENT_ID effective_deployment_id = deployment_id or DEPLOYMENT_ID
logger.info( logger.info(f"Using deployment_id={effective_deployment_id} (from_metadata={deployment_id is not None})")
f"Using WellNuo ask_wellnuo_ai API with deployment_id: {effective_deployment_id}"
)
if beneficiary_names_dict:
logger.info(f"Beneficiary names dict: {beneficiary_names_dict}")
else:
logger.info("No beneficiary_names_dict provided, using default behavior")
# ROBUST MODE: Barge-in DISABLED
# User cannot interrupt the agent while it's speaking.
# This prevents hallucinations from background noise.
session = AgentSession( session = AgentSession(
# Deepgram Nova-2 for accurate speech-to-text # Deepgram Nova-2 model for best STT accuracy
stt=deepgram.STT(model="nova-2"), stt=deepgram.STT(
model="nova-2-general",
language="en-US",
smart_format=True, # Better punctuation and formatting
no_delay=True, # Faster response for real-time
),
# WellNuo voice_ask API for LLM with dynamic beneficiary data # WellNuo voice_ask API for LLM with dynamic beneficiary data
llm=WellNuoLLM( llm=WellNuoLLM(
deployment_id=deployment_id, deployment_id=effective_deployment_id,
beneficiary_names_dict=beneficiary_names_dict, beneficiary_names_dict=beneficiary_names_dict,
), ),
# Deepgram Aura Asteria for natural female voice # Deepgram Aura Asteria for natural female voice
tts=deepgram.TTS(model="aura-asteria-en"), tts=deepgram.TTS(model="aura-asteria-en"),
# Silero VAD for voice activity detection # Silero VAD for voice activity detection (strict settings)
vad=ctx.proc.userdata["vad"], vad=ctx.proc.userdata["vad"],
# DISABLE INTERRUPTIONS COMPLETELY:
# allow_interruptions=False means user cannot interrupt agent while speaking
# This prevents "hallucinations" from random noises being interpreted as speech
allow_interruptions=False,
# CRITICAL: Discard any audio captured while agent is speaking
# This ensures user speech during agent output is completely ignored
# See: https://github.com/livekit/agents/issues/4316
discard_audio_if_uninterruptible=True,
# Require longer speech before processing (filter out short noises)
min_interruption_duration=2.0,
) )
# Start the session with Julia assistant # Start the session with Julia assistant
@ -441,6 +465,7 @@ if __name__ == "__main__":
entrypoint_fnc=entrypoint, entrypoint_fnc=entrypoint,
prewarm_fnc=prewarm, prewarm_fnc=prewarm,
# Agent name must match what token requests (AGENT_NAME in livekit.js) # Agent name must match what token requests (AGENT_NAME in livekit.js)
agent_name="julia-ai", # ROBUST version - separate from julia-ai
agent_name="julia-robust",
) )
) )

View File

@ -10,7 +10,8 @@ app.use(express.json());
const LIVEKIT_API_KEY = process.env.LIVEKIT_API_KEY || 'APIEivUcPW3WSrV'; const LIVEKIT_API_KEY = process.env.LIVEKIT_API_KEY || 'APIEivUcPW3WSrV';
const LIVEKIT_API_SECRET = process.env.LIVEKIT_API_SECRET || 'A65mc5KUKE0VGdZNaMRwe6uJpA9ZQPAxS66akZTOfmL'; const LIVEKIT_API_SECRET = process.env.LIVEKIT_API_SECRET || 'A65mc5KUKE0VGdZNaMRwe6uJpA9ZQPAxS66akZTOfmL';
const LIVEKIT_URL = 'wss://live-kit-demo-70txlh6a.livekit.cloud'; const LIVEKIT_URL = 'wss://live-kit-demo-70txlh6a.livekit.cloud';
const AGENT_NAME = 'julia-ai'; // ROBUST MODE: Use julia-robust agent (no barge-in)
const AGENT_NAME = 'julia-robust';
// Health check // Health check
app.get('/health', (req, res) => { app.get('/health', (req, res) => {

View File

@ -5,6 +5,7 @@
*/ */
// Julia Token Server (dedicated endpoint for LiveKit tokens) // Julia Token Server (dedicated endpoint for LiveKit tokens)
// Production: Use remote Julia Token Server
const JULIA_TOKEN_SERVER = 'https://wellnuo.smartlaunchhub.com/julia'; const JULIA_TOKEN_SERVER = 'https://wellnuo.smartlaunchhub.com/julia';
// Voice configuration // Voice configuration