Sergei 059bc29b6b WIP: LiveKit voice call integration with Julia AI agent
NOT TESTED ON REAL DEVICE - simulator only verification

Components:
- LiveKit Cloud agent deployment (julia-agent/julia-ai/)
- React Native LiveKit client (hooks/useLiveKitRoom.ts)
- Voice call screen with audio session management
- WellNuo voice_ask API integration in Python agent

Tech stack:
- LiveKit Cloud for agent hosting
- @livekit/react-native SDK
- Deepgram STT/TTS (via LiveKit Cloud)
- Silero VAD for voice activity detection

Known issues:
- Microphone permissions may need manual testing on real device
- LiveKit audio playback not verified on physical hardware
- Agent greeting audio not confirmed working end-to-end

Next steps:
- Test on physical iOS device
- Verify microphone capture works
- Confirm TTS audio playback
- Test full conversation loop
2026-01-18 20:16:25 -08:00

203 lines
6.5 KiB
Python

"""
WellNuo Voice Agent - Julia AI
LiveKit Agents Cloud deployment
Uses WellNuo voice_ask API for LLM responses, Deepgram for STT/TTS
"""
import logging
import os
import random
import aiohttp
from livekit.agents import (
Agent,
AgentSession,
JobContext,
JobProcess,
RoomInputOptions,
WorkerOptions,
cli,
llm,
)
from livekit.plugins import deepgram, silero, noise_cancellation
logger = logging.getLogger("julia-ai")
# WellNuo API Configuration
WELLNUO_API_URL = "https://eluxnetworks.net/function/well-api/api"
WELLNUO_USER = os.getenv("WELLNUO_USER", "anandk")
WELLNUO_PASSWORD = os.getenv("WELLNUO_PASSWORD", "anandk_8")
# Hardcoded Ferdinand's deployment_id for testing
DEPLOYMENT_ID = os.getenv("DEPLOYMENT_ID", "21")
# Julia's personality for voice synthesis
JULIA_GREETING = "Hello! I'm Julia, your AI care assistant. How can I help you today?"
class WellNuoLLM(llm.LLM):
"""Custom LLM that uses WellNuo voice_ask API."""
def __init__(self):
super().__init__()
self._token = None
self._session = None
async def _ensure_token(self):
"""Get authentication token from WellNuo API."""
if self._token:
return self._token
async with aiohttp.ClientSession() as session:
# Generate random nonce for request
nonce = str(random.randint(0, 999999))
data = {
"function": "credentials",
"clientId": "001",
"user_name": WELLNUO_USER,
"ps": WELLNUO_PASSWORD, # API expects 'ps' not 'password'
"nonce": nonce,
}
async with session.post(WELLNUO_API_URL, data=data) as resp:
result = await resp.json()
if result.get("status") == "200 OK":
self._token = result.get("access_token")
logger.info("WellNuo token obtained successfully")
return self._token
else:
logger.error(f"Failed to get WellNuo token: {result}")
raise Exception("Failed to authenticate with WellNuo API")
async def chat(
self,
*,
chat_ctx: llm.ChatContext,
tools: list[llm.FunctionTool] | None = None,
tool_choice: llm.ToolChoice | None = None,
parallel_tool_calls: bool | None = None,
extra_body: dict | None = None,
) -> llm.LLMStream:
"""Send user question to WellNuo voice_ask API."""
# Get the last user message
user_message = ""
for msg in reversed(chat_ctx.items):
if hasattr(msg, 'role') and msg.role == "user":
if hasattr(msg, 'content'):
user_message = msg.content
break
if not user_message:
# Return a default response if no user message
return WellNuoLLMStream("I'm here to help. What would you like to know?")
logger.info(f"User question: {user_message}")
# Get response from WellNuo API
try:
token = await self._ensure_token()
async with aiohttp.ClientSession() as session:
data = {
"function": "voice_ask",
"clientId": "001",
"user_name": WELLNUO_USER,
"token": token,
"question": user_message,
"deployment_id": DEPLOYMENT_ID,
}
async with session.post(WELLNUO_API_URL, data=data) as resp:
result = await resp.json()
if result.get("ok"):
response_body = result.get("response", {}).get("body", "")
logger.info(f"WellNuo response: {response_body}")
return WellNuoLLMStream(response_body)
else:
logger.error(f"WellNuo API error: {result}")
return WellNuoLLMStream("I'm sorry, I couldn't get that information right now.")
except Exception as e:
logger.error(f"Error calling WellNuo API: {e}")
return WellNuoLLMStream("I'm having trouble connecting. Please try again.")
class WellNuoLLMStream(llm.LLMStream):
"""Stream wrapper for WellNuo API response."""
def __init__(self, response_text: str):
super().__init__(
llm=None,
chat_ctx=llm.ChatContext(),
tools=[],
tool_choice=None,
parallel_tool_calls=None,
extra_body=None,
)
self._response_text = response_text
self._sent = False
async def _run(self):
"""Yield the response as a single chunk."""
pass
async def __anext__(self) -> llm.ChatChunk:
if self._sent:
raise StopAsyncIteration
self._sent = True
return llm.ChatChunk(
id="wellnuo-response",
delta=llm.ChoiceDelta(
role="assistant",
content=self._response_text,
),
)
def __aiter__(self):
return self
def prewarm(proc: JobProcess):
"""Preload VAD model for faster startup."""
proc.userdata["vad"] = silero.VAD.load()
async def entrypoint(ctx: JobContext):
"""Main Julia AI voice session handler."""
logger.info(f"Starting Julia AI session in room {ctx.room.name}")
logger.info(f"Using WellNuo voice_ask API with deployment_id: {DEPLOYMENT_ID}")
session = AgentSession(
# Deepgram Nova-2 for accurate speech-to-text
stt=deepgram.STT(model="nova-2"),
# WellNuo voice_ask API for LLM
llm=WellNuoLLM(),
# Deepgram Aura Asteria for natural female voice
tts=deepgram.TTS(model="aura-asteria-en"),
# Silero VAD for voice activity detection
vad=ctx.proc.userdata["vad"],
)
# Start the session with Julia assistant
await session.start(
agent=Agent(instructions="You are Julia, a helpful AI care assistant."),
room=ctx.room,
room_input_options=RoomInputOptions(
# Enable noise cancellation
noise_cancellation=noise_cancellation.BVC(),
),
)
# Generate initial greeting
await session.generate_reply(
instructions="Greet the user warmly as Julia. Briefly introduce yourself as their AI care assistant and ask how you can help them today."
)
if __name__ == "__main__":
cli.run_app(
WorkerOptions(
entrypoint_fnc=entrypoint,
prewarm_fnc=prewarm,
)
)