WellNuo Lite Robert - Julia Robust Agent (no barge-in)
Changes: - Updated livekitService.ts to use remote token server - Julia-robust agent with disabled interruptions - Added discard_audio_if_uninterruptible=True - Added min_interruption_duration=2.0 - Token server configured for julia-robust agent
This commit is contained in:
parent
a578ec8081
commit
6d017ea617
1
.gitignore
vendored
1
.gitignore
vendored
@ -54,3 +54,4 @@ store-screenshots/
|
|||||||
|
|
||||||
# Build artifacts
|
# Build artifacts
|
||||||
WellNuoLite-Android/
|
WellNuoLite-Android/
|
||||||
|
julia-agent/julia-ai/google-credentials.json
|
||||||
|
|||||||
2
app.json
2
app.json
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"expo": {
|
"expo": {
|
||||||
"name": "WellNuo",
|
"name": "WellNuo ROBUST",
|
||||||
"slug": "WellNuo",
|
"slug": "WellNuo",
|
||||||
"version": "1.0.5",
|
"version": "1.0.5",
|
||||||
"orientation": "portrait",
|
"orientation": "portrait",
|
||||||
|
|||||||
@ -72,11 +72,14 @@ export default function TabLayout() {
|
|||||||
),
|
),
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
{/* Debug tab - HIDDEN, no longer needed */}
|
{/* Debug tab - Voice call debugging with detailed logs */}
|
||||||
<Tabs.Screen
|
<Tabs.Screen
|
||||||
name="debug"
|
name="debug"
|
||||||
options={{
|
options={{
|
||||||
href: null,
|
title: 'Debug',
|
||||||
|
tabBarIcon: ({ color, size }) => (
|
||||||
|
<Feather name="terminal" size={22} color={color} />
|
||||||
|
),
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
{/* Hide explore tab */}
|
{/* Hide explore tab */}
|
||||||
|
|||||||
@ -6,3 +6,9 @@ id = "CA_Yd3qcuYEVKKE"
|
|||||||
|
|
||||||
[build]
|
[build]
|
||||||
dockerfile = "Dockerfile"
|
dockerfile = "Dockerfile"
|
||||||
|
|
||||||
|
[env]
|
||||||
|
# Deepgram for TTS
|
||||||
|
DEEPGRAM_API_KEY = "cec33b489b0ba12c4e4f1ea888e887e88fba5848"
|
||||||
|
# AssemblyAI for STT (best accuracy - correctly recognizes "dad" vs "dead")
|
||||||
|
ASSEMBLYAI_API_KEY = "42e753b65b6a4360ae4a77ac76961857"
|
||||||
|
|||||||
@ -12,6 +12,8 @@ dependencies = [
|
|||||||
"livekit-agents[silero]~=1.3",
|
"livekit-agents[silero]~=1.3",
|
||||||
"livekit-plugins-noise-cancellation~=0.2",
|
"livekit-plugins-noise-cancellation~=0.2",
|
||||||
"livekit-plugins-deepgram~=1.0",
|
"livekit-plugins-deepgram~=1.0",
|
||||||
|
# Removed assemblyai - was giving garbage transcriptions
|
||||||
|
# Deepgram Nova-2 is faster and more accurate
|
||||||
"python-dotenv",
|
"python-dotenv",
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1,7 +1,10 @@
|
|||||||
"""
|
"""
|
||||||
WellNuo Voice Agent - Julia AI
|
WellNuo Voice Agent - Julia Robust (NO BARGE-IN)
|
||||||
LiveKit Agents Cloud deployment
|
LiveKit Agents Cloud deployment
|
||||||
Uses WellNuo ask_wellnuo_ai API for LLM responses, Deepgram for STT/TTS
|
Uses WellNuo ask_wellnuo_ai API for LLM responses, Deepgram for STT/TTS
|
||||||
|
|
||||||
|
ROBUST MODE: Barge-in is DISABLED - user cannot interrupt the agent.
|
||||||
|
This prevents hallucinations from background noise being interpreted as speech.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
@ -24,7 +27,7 @@ from livekit.agents import (
|
|||||||
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, APIConnectOptions
|
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, APIConnectOptions
|
||||||
from livekit.plugins import deepgram, noise_cancellation, silero
|
from livekit.plugins import deepgram, noise_cancellation, silero
|
||||||
|
|
||||||
logger = logging.getLogger("julia-ai")
|
logger = logging.getLogger("julia-robust")
|
||||||
|
|
||||||
# WellNuo API Configuration
|
# WellNuo API Configuration
|
||||||
WELLNUO_API_URL = "https://eluxnetworks.net/function/well-api/api"
|
WELLNUO_API_URL = "https://eluxnetworks.net/function/well-api/api"
|
||||||
@ -311,8 +314,17 @@ class WellNuoLLMStream(llm.LLMStream):
|
|||||||
|
|
||||||
|
|
||||||
def prewarm(proc: JobProcess):
|
def prewarm(proc: JobProcess):
|
||||||
"""Preload VAD model for faster startup."""
|
"""Preload VAD model for faster startup.
|
||||||
proc.userdata["vad"] = silero.VAD.load()
|
|
||||||
|
ROBUST MODE: VAD is still needed for detecting when user FINISHES speaking,
|
||||||
|
but we'll disable interruptions in the AgentSession.
|
||||||
|
High thresholds to only respond to clear, deliberate speech.
|
||||||
|
"""
|
||||||
|
proc.userdata["vad"] = silero.VAD.load(
|
||||||
|
min_silence_duration=1.5, # Wait 1.5s of silence before ending speech (very patient)
|
||||||
|
min_speech_duration=0.3, # Require 0.3s of speech to start (filter short noises)
|
||||||
|
activation_threshold=0.6, # Higher threshold - only clear speech triggers (default: 0.5)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def wait_for_participant_with_metadata(
|
async def wait_for_participant_with_metadata(
|
||||||
@ -387,36 +399,48 @@ async def entrypoint(ctx: JobContext):
|
|||||||
# CRITICAL: Must connect to room first before accessing ctx.room
|
# CRITICAL: Must connect to room first before accessing ctx.room
|
||||||
await ctx.connect()
|
await ctx.connect()
|
||||||
|
|
||||||
logger.info(f"Starting Julia AI session in room {ctx.room.name}")
|
logger.info(f"Starting Julia ROBUST (no barge-in) session in room {ctx.room.name}")
|
||||||
|
|
||||||
# Wait for participant with metadata (fixes race condition)
|
# Wait for participant with metadata - short timeout since metadata arrives immediately if present
|
||||||
# The mobile app sends deploymentId and beneficiaryNamesDict in token metadata
|
# The mobile app sends deploymentId via token metadata
|
||||||
deployment_id, beneficiary_names_dict = await wait_for_participant_with_metadata(
|
deployment_id, beneficiary_names_dict = await wait_for_participant_with_metadata(
|
||||||
ctx, timeout=10.0
|
ctx, timeout=2.0 # 2 seconds is enough - if metadata exists, it arrives within 0.5s
|
||||||
)
|
)
|
||||||
|
|
||||||
# Log what we're using
|
# Use deployment_id from metadata, or fall back to default
|
||||||
effective_deployment_id = deployment_id or DEPLOYMENT_ID
|
effective_deployment_id = deployment_id or DEPLOYMENT_ID
|
||||||
logger.info(
|
logger.info(f"Using deployment_id={effective_deployment_id} (from_metadata={deployment_id is not None})")
|
||||||
f"Using WellNuo ask_wellnuo_ai API with deployment_id: {effective_deployment_id}"
|
|
||||||
)
|
|
||||||
if beneficiary_names_dict:
|
|
||||||
logger.info(f"Beneficiary names dict: {beneficiary_names_dict}")
|
|
||||||
else:
|
|
||||||
logger.info("No beneficiary_names_dict provided, using default behavior")
|
|
||||||
|
|
||||||
|
# ROBUST MODE: Barge-in DISABLED
|
||||||
|
# User cannot interrupt the agent while it's speaking.
|
||||||
|
# This prevents hallucinations from background noise.
|
||||||
session = AgentSession(
|
session = AgentSession(
|
||||||
# Deepgram Nova-2 for accurate speech-to-text
|
# Deepgram Nova-2 model for best STT accuracy
|
||||||
stt=deepgram.STT(model="nova-2"),
|
stt=deepgram.STT(
|
||||||
|
model="nova-2-general",
|
||||||
|
language="en-US",
|
||||||
|
smart_format=True, # Better punctuation and formatting
|
||||||
|
no_delay=True, # Faster response for real-time
|
||||||
|
),
|
||||||
# WellNuo voice_ask API for LLM with dynamic beneficiary data
|
# WellNuo voice_ask API for LLM with dynamic beneficiary data
|
||||||
llm=WellNuoLLM(
|
llm=WellNuoLLM(
|
||||||
deployment_id=deployment_id,
|
deployment_id=effective_deployment_id,
|
||||||
beneficiary_names_dict=beneficiary_names_dict,
|
beneficiary_names_dict=beneficiary_names_dict,
|
||||||
),
|
),
|
||||||
# Deepgram Aura Asteria for natural female voice
|
# Deepgram Aura Asteria for natural female voice
|
||||||
tts=deepgram.TTS(model="aura-asteria-en"),
|
tts=deepgram.TTS(model="aura-asteria-en"),
|
||||||
# Silero VAD for voice activity detection
|
# Silero VAD for voice activity detection (strict settings)
|
||||||
vad=ctx.proc.userdata["vad"],
|
vad=ctx.proc.userdata["vad"],
|
||||||
|
# DISABLE INTERRUPTIONS COMPLETELY:
|
||||||
|
# allow_interruptions=False means user cannot interrupt agent while speaking
|
||||||
|
# This prevents "hallucinations" from random noises being interpreted as speech
|
||||||
|
allow_interruptions=False,
|
||||||
|
# CRITICAL: Discard any audio captured while agent is speaking
|
||||||
|
# This ensures user speech during agent output is completely ignored
|
||||||
|
# See: https://github.com/livekit/agents/issues/4316
|
||||||
|
discard_audio_if_uninterruptible=True,
|
||||||
|
# Require longer speech before processing (filter out short noises)
|
||||||
|
min_interruption_duration=2.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Start the session with Julia assistant
|
# Start the session with Julia assistant
|
||||||
@ -441,6 +465,7 @@ if __name__ == "__main__":
|
|||||||
entrypoint_fnc=entrypoint,
|
entrypoint_fnc=entrypoint,
|
||||||
prewarm_fnc=prewarm,
|
prewarm_fnc=prewarm,
|
||||||
# Agent name must match what token requests (AGENT_NAME in livekit.js)
|
# Agent name must match what token requests (AGENT_NAME in livekit.js)
|
||||||
agent_name="julia-ai",
|
# ROBUST version - separate from julia-ai
|
||||||
|
agent_name="julia-robust",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@ -10,7 +10,8 @@ app.use(express.json());
|
|||||||
const LIVEKIT_API_KEY = process.env.LIVEKIT_API_KEY || 'APIEivUcPW3WSrV';
|
const LIVEKIT_API_KEY = process.env.LIVEKIT_API_KEY || 'APIEivUcPW3WSrV';
|
||||||
const LIVEKIT_API_SECRET = process.env.LIVEKIT_API_SECRET || 'A65mc5KUKE0VGdZNaMRwe6uJpA9ZQPAxS66akZTOfmL';
|
const LIVEKIT_API_SECRET = process.env.LIVEKIT_API_SECRET || 'A65mc5KUKE0VGdZNaMRwe6uJpA9ZQPAxS66akZTOfmL';
|
||||||
const LIVEKIT_URL = 'wss://live-kit-demo-70txlh6a.livekit.cloud';
|
const LIVEKIT_URL = 'wss://live-kit-demo-70txlh6a.livekit.cloud';
|
||||||
const AGENT_NAME = 'julia-ai';
|
// ROBUST MODE: Use julia-robust agent (no barge-in)
|
||||||
|
const AGENT_NAME = 'julia-robust';
|
||||||
|
|
||||||
// Health check
|
// Health check
|
||||||
app.get('/health', (req, res) => {
|
app.get('/health', (req, res) => {
|
||||||
|
|||||||
@ -5,6 +5,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
// Julia Token Server (dedicated endpoint for LiveKit tokens)
|
// Julia Token Server (dedicated endpoint for LiveKit tokens)
|
||||||
|
// Production: Use remote Julia Token Server
|
||||||
const JULIA_TOKEN_SERVER = 'https://wellnuo.smartlaunchhub.com/julia';
|
const JULIA_TOKEN_SERVER = 'https://wellnuo.smartlaunchhub.com/julia';
|
||||||
|
|
||||||
// Voice configuration
|
// Voice configuration
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user