Remove LiveKit SDK and related code
- Remove @livekit/react-native-expo-plugin from app.json - Remove @config-plugins/react-native-webrtc plugin - Delete utils/audioSession.ts (depended on LiveKit AudioSession) - Update VoiceCallContext.tsx comments - Update callManager.ts comments - Update _layout.tsx TODO comment - Remove LiveKit documentation files - Add interruptIfSpeaking to VoiceContext for TTS interrupt 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
66a8395602
commit
356205d8c0
11
app.json
11
app.json
@ -1,6 +1,6 @@
|
||||
{
|
||||
"expo": {
|
||||
"name": "WellNuo",
|
||||
"name": "WellNuo Lite",
|
||||
"slug": "WellNuo",
|
||||
"version": "1.0.6",
|
||||
"orientation": "portrait",
|
||||
@ -55,15 +55,6 @@
|
||||
"favicon": "./assets/images/favicon.png"
|
||||
},
|
||||
"plugins": [
|
||||
[
|
||||
"@livekit/react-native-expo-plugin",
|
||||
{
|
||||
"android": {
|
||||
"audioType": "media"
|
||||
}
|
||||
}
|
||||
],
|
||||
"@config-plugins/react-native-webrtc",
|
||||
[
|
||||
"@jamsch/expo-speech-recognition",
|
||||
{
|
||||
|
||||
@ -19,8 +19,7 @@ export default function TabLayout() {
|
||||
|
||||
// Handle voice FAB press - initiate voice call
|
||||
const handleVoiceFABPress = useCallback(() => {
|
||||
// TODO: Integrate with LiveKit voice call when ready
|
||||
// For now, show placeholder alert
|
||||
// TODO: Integrate with voice call service when ready
|
||||
Alert.alert(
|
||||
'Voice Call',
|
||||
'Voice call with Julia AI will be available soon.',
|
||||
|
||||
@ -12,7 +12,7 @@ interface VoiceCallState {
|
||||
isActive: boolean;
|
||||
// Whether the call UI is minimized (showing bubble instead of full screen)
|
||||
isMinimized: boolean;
|
||||
// LiveKit connection details
|
||||
// Voice service connection details
|
||||
token: string | undefined;
|
||||
wsUrl: string | undefined;
|
||||
// Call metadata
|
||||
|
||||
@ -267,6 +267,21 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
|
||||
[getWellNuoToken]
|
||||
);
|
||||
|
||||
/**
|
||||
* Interrupt TTS when user starts speaking
|
||||
* Call this from the STT hook when voice activity is detected
|
||||
*/
|
||||
const interruptIfSpeaking = useCallback(() => {
|
||||
if (isSpeaking) {
|
||||
console.log('[VoiceContext] User interrupted - stopping TTS');
|
||||
Speech.stop();
|
||||
setIsSpeaking(false);
|
||||
setStatus('listening');
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}, [isSpeaking]);
|
||||
|
||||
/**
|
||||
* Speak text using TTS
|
||||
*/
|
||||
@ -299,7 +314,7 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
|
||||
resolve();
|
||||
},
|
||||
onStopped: () => {
|
||||
console.log('[VoiceContext] TTS stopped');
|
||||
console.log('[VoiceContext] TTS stopped (interrupted)');
|
||||
setIsSpeaking(false);
|
||||
setStatus('listening');
|
||||
resolve();
|
||||
|
||||
@ -1,279 +0,0 @@
|
||||
# Julia AI Voice Integration
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ WellNuo Lite App (iOS) │
|
||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
||||
│ │ Voice Call Screen (app/voice-call.tsx) │ │
|
||||
│ │ - useLiveKitRoom hook │ │
|
||||
│ │ - Audio session management │ │
|
||||
│ │ - Microphone permission handling │ │
|
||||
│ └───────────────────────┬─────────────────────────────────┘ │
|
||||
│ │ WebSocket + WebRTC │
|
||||
└──────────────────────────┼──────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ LiveKit Cloud │
|
||||
│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
|
||||
│ │ SFU Server │ │ Room Mgmt │ │ Agent Hosting │ │
|
||||
│ │ (WebRTC) │ │ (Token Auth) │ │ (Python) │ │
|
||||
│ └────────┬────────┘ └─────────────────┘ └────────┬────────┘ │
|
||||
│ │ │ │
|
||||
│ └──────────────────────────────────────────┘ │
|
||||
│ │ Audio Streams │
|
||||
└──────────────────────────┼──────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Julia AI Agent (Python) │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │
|
||||
│ │ Deepgram │ │ Deepgram │ │ WellNuo voice_ask API │ │
|
||||
│ │ STT │ │ TTS │ │ (Custom LLM backend) │ │
|
||||
│ │ (Nova-2) │ │ (Aura) │ │ │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### 1. React Native Client
|
||||
|
||||
**Location:** `app/voice-call.tsx`, `hooks/useLiveKitRoom.ts`
|
||||
|
||||
**Dependencies:**
|
||||
- `@livekit/react-native` - LiveKit React Native SDK
|
||||
- `@livekit/react-native-webrtc` - WebRTC for React Native
|
||||
- `expo-av` - Audio session management
|
||||
|
||||
**Key Features:**
|
||||
- Connects to LiveKit room with JWT token
|
||||
- Manages audio session (activates speaker mode)
|
||||
- Handles microphone permissions
|
||||
- Displays connection state and transcription
|
||||
|
||||
### 2. LiveKit Cloud
|
||||
|
||||
**Project:** `live-kit-demo-70txlh6a`
|
||||
**Agent ID:** `CA_Yd3qcuYEVKKE`
|
||||
|
||||
**Configuration:**
|
||||
- Auto-scaling agent workers
|
||||
- Managed STT/TTS through inference endpoints
|
||||
- Built-in noise cancellation
|
||||
|
||||
**Getting Tokens:**
|
||||
```typescript
|
||||
// From WellNuo backend
|
||||
const response = await fetch('/api/livekit/token', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ roomName, userName })
|
||||
});
|
||||
const { token, url } = await response.json();
|
||||
```
|
||||
|
||||
### 3. Julia AI Agent (Python)
|
||||
|
||||
**Location:** `julia-agent/julia-ai/src/agent.py`
|
||||
|
||||
**Stack:**
|
||||
- LiveKit Agents SDK
|
||||
- Deepgram Nova-2 (STT)
|
||||
- Deepgram Aura Asteria (TTS - female voice)
|
||||
- Silero VAD (Voice Activity Detection)
|
||||
- Custom WellNuo LLM (voice_ask API)
|
||||
|
||||
## Setup & Deployment
|
||||
|
||||
### Prerequisites
|
||||
|
||||
1. **LiveKit Cloud Account**
|
||||
- Sign up at https://cloud.livekit.io/
|
||||
- Create a project
|
||||
- Get API credentials
|
||||
|
||||
2. **LiveKit CLI**
|
||||
```bash
|
||||
# macOS
|
||||
brew install livekit-cli
|
||||
|
||||
# Login
|
||||
lk cloud auth
|
||||
```
|
||||
|
||||
### Agent Deployment
|
||||
|
||||
1. **Navigate to agent directory:**
|
||||
```bash
|
||||
cd julia-agent/julia-ai
|
||||
```
|
||||
|
||||
2. **Install dependencies:**
|
||||
```bash
|
||||
uv sync
|
||||
```
|
||||
|
||||
3. **Configure environment:**
|
||||
```bash
|
||||
cp .env.example .env.local
|
||||
# Add LIVEKIT_URL, LIVEKIT_API_KEY, LIVEKIT_API_SECRET
|
||||
```
|
||||
|
||||
4. **Local development:**
|
||||
```bash
|
||||
uv run python src/agent.py dev
|
||||
```
|
||||
|
||||
5. **Deploy to LiveKit Cloud:**
|
||||
```bash
|
||||
lk agent deploy
|
||||
```
|
||||
|
||||
### React Native Setup
|
||||
|
||||
1. **Install packages:**
|
||||
```bash
|
||||
npm install @livekit/react-native @livekit/react-native-webrtc
|
||||
```
|
||||
|
||||
2. **iOS permissions (Info.plist):**
|
||||
```xml
|
||||
<key>NSMicrophoneUsageDescription</key>
|
||||
<string>WellNuo needs microphone access for voice calls with Julia AI</string>
|
||||
```
|
||||
|
||||
3. **Pod install:**
|
||||
```bash
|
||||
cd ios && pod install
|
||||
```
|
||||
|
||||
## Flow Diagram
|
||||
|
||||
```
|
||||
User opens Voice tab
|
||||
│
|
||||
▼
|
||||
Request microphone permission
|
||||
│
|
||||
├─ Denied → Show error
|
||||
│
|
||||
▼
|
||||
Get LiveKit token from WellNuo API
|
||||
│
|
||||
▼
|
||||
Connect to LiveKit room
|
||||
│
|
||||
▼
|
||||
Agent joins automatically (LiveKit Cloud)
|
||||
│
|
||||
▼
|
||||
Agent sends greeting (TTS)
|
||||
│
|
||||
▼
|
||||
User speaks → STT → WellNuo API → Response → TTS
|
||||
│
|
||||
▼
|
||||
User ends call → Disconnect from room
|
||||
```
|
||||
|
||||
## API Integration
|
||||
|
||||
### WellNuo voice_ask API
|
||||
|
||||
The agent uses WellNuo's `voice_ask` API to get contextual responses about the beneficiary.
|
||||
|
||||
**Endpoint:** `https://eluxnetworks.net/function/well-api/api`
|
||||
|
||||
**Authentication:**
|
||||
```python
|
||||
data = {
|
||||
"function": "credentials",
|
||||
"clientId": "001",
|
||||
"user_name": WELLNUO_USER,
|
||||
"ps": WELLNUO_PASSWORD,
|
||||
"nonce": str(random.randint(0, 999999)),
|
||||
}
|
||||
```
|
||||
|
||||
**Voice Ask:**
|
||||
```python
|
||||
data = {
|
||||
"function": "voice_ask",
|
||||
"clientId": "001",
|
||||
"user_name": WELLNUO_USER,
|
||||
"token": token,
|
||||
"question": user_message,
|
||||
"deployment_id": DEPLOYMENT_ID,
|
||||
}
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **No audio playback on iOS**
|
||||
- Check audio session configuration
|
||||
- Ensure `expo-av` is properly configured
|
||||
- Test on real device (simulator has audio limitations)
|
||||
|
||||
2. **Microphone not working**
|
||||
- Verify permissions in Info.plist
|
||||
- Check if user granted permission
|
||||
- Real device required for full audio testing
|
||||
|
||||
3. **Agent not responding**
|
||||
- Check agent logs: `lk agent logs`
|
||||
- Verify LIVEKIT credentials
|
||||
- Check WellNuo API connectivity
|
||||
|
||||
4. **Connection fails**
|
||||
- Verify token is valid
|
||||
- Check network connectivity
|
||||
- Ensure LiveKit URL is correct
|
||||
|
||||
### Debugging
|
||||
|
||||
```bash
|
||||
# View agent logs
|
||||
lk agent logs
|
||||
|
||||
# View specific deployment logs
|
||||
lk agent logs --version v20260119031418
|
||||
|
||||
# Check agent status
|
||||
lk agent list
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
### Agent (.env.local)
|
||||
```
|
||||
LIVEKIT_URL=wss://live-kit-demo-70txlh6a.livekit.cloud
|
||||
LIVEKIT_API_KEY=your-api-key
|
||||
LIVEKIT_API_SECRET=your-api-secret
|
||||
WELLNUO_USER=anandk
|
||||
WELLNUO_PASSWORD=anandk_8
|
||||
DEPLOYMENT_ID=21
|
||||
```
|
||||
|
||||
### React Native (via WellNuo backend)
|
||||
Token generation handled server-side for security.
|
||||
|
||||
## Status
|
||||
|
||||
**Current State:** WIP - Not tested on real device
|
||||
|
||||
**Working:**
|
||||
- Agent deploys to LiveKit Cloud
|
||||
- Agent connects to rooms
|
||||
- STT/TTS pipeline configured
|
||||
- WellNuo API integration
|
||||
- React Native UI
|
||||
|
||||
**Needs Testing:**
|
||||
- Real device microphone capture
|
||||
- Audio playback on physical iOS device
|
||||
- Full conversation loop end-to-end
|
||||
- Token refresh/expiration handling
|
||||
@ -3,8 +3,6 @@
|
||||
*
|
||||
* Ensures only ONE voice call can be active at a time per device.
|
||||
* If a new call is started while another is active, the old one is disconnected first.
|
||||
*
|
||||
* This addresses the LiveKit concurrent agent jobs limit (5 per project).
|
||||
*/
|
||||
|
||||
type DisconnectCallback = () => Promise<void>;
|
||||
|
||||
@ -1,336 +0,0 @@
|
||||
# FEATURE-002: LiveKit Voice Call with Julia AI
|
||||
|
||||
## Summary
|
||||
|
||||
Полноценный голосовой звонок с Julia AI через LiveKit Cloud. Пользователь нажимает кнопку "Start Voice Call", открывается экран звонка в стиле телефона, и он может разговаривать с Julia AI голосом.
|
||||
|
||||
## Status: 🔴 Not Started (требуется полная переделка)
|
||||
|
||||
## Priority: Critical
|
||||
|
||||
## Problem Statement
|
||||
|
||||
Текущая реализация имеет следующие проблемы:
|
||||
1. **STT (Speech-to-Text) работает нестабильно** — микрофон иногда детектируется, иногда нет
|
||||
2. **TTS работает** — голос Julia слышен
|
||||
3. **Код сложный и запутанный** — много legacy кода, полифиллов, хаков
|
||||
4. **Нет четкой архитектуры** — все в одном файле voice-call.tsx
|
||||
|
||||
## Root Cause Analysis
|
||||
|
||||
### Почему микрофон работает нестабильно:
|
||||
1. **iOS AudioSession** — неправильная конфигурация или race condition при настройке
|
||||
2. **registerGlobals()** — WebRTC polyfills могут не успевать инициализироваться
|
||||
3. **Permissions** — микрофон может быть не разрешен или занят другим процессом
|
||||
4. **Event handling** — события LiveKit могут теряться
|
||||
|
||||
### Что работает:
|
||||
- LiveKit Cloud connection ✅
|
||||
- Token generation ✅
|
||||
- TTS (Deepgram Asteria) ✅
|
||||
- Backend agent (Julia AI) ✅
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### System Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ WellNuo Lite App (iOS) │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────────┐ ┌──────────────────┐ ┌──────────────────┐ │
|
||||
│ │ Voice Tab │───▶│ VoiceCallScreen │───▶│ LiveKit Room │ │
|
||||
│ │ (entry) │ │ (fullscreen) │ │ (WebRTC) │ │
|
||||
│ └──────────────┘ └──────────────────┘ └──────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │useLiveKitRoom│ │ AudioSession │ │
|
||||
│ │ (hook) │ │ (iOS native) │ │
|
||||
│ └──────────────┘ └──────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
│ WebSocket + WebRTC
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ LiveKit Cloud │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ Room: wellnuo-{userId}-{timestamp} │
|
||||
│ Participants: user + julia-agent │
|
||||
│ Audio Tracks: bidirectional │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
│ Agent dispatch
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ Julia AI Agent (Python) │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ STT: Deepgram Nova-2 │
|
||||
│ LLM: WellNuo voice_ask API │
|
||||
│ TTS: Deepgram Aura Asteria │
|
||||
│ Framework: LiveKit Agents SDK 1.3.11 │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Data Flow
|
||||
|
||||
```
|
||||
User speaks → iOS Mic → WebRTC → LiveKit Cloud → Agent → Deepgram STT
|
||||
│
|
||||
▼
|
||||
WellNuo API (LLM)
|
||||
│
|
||||
▼
|
||||
Agent receives text ← LiveKit Cloud ← WebRTC ← Deepgram TTS (audio)
|
||||
│
|
||||
▼
|
||||
iOS Speaker → User hears Julia
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Technical Requirements
|
||||
|
||||
### Dependencies (package.json)
|
||||
|
||||
```json
|
||||
{
|
||||
"@livekit/react-native": "^2.x",
|
||||
"livekit-client": "^2.x",
|
||||
"expo-keep-awake": "^14.x"
|
||||
}
|
||||
```
|
||||
|
||||
### iOS Permissions (app.json)
|
||||
|
||||
```json
|
||||
{
|
||||
"ios": {
|
||||
"infoPlist": {
|
||||
"NSMicrophoneUsageDescription": "WellNuo needs microphone access for voice calls with Julia AI",
|
||||
"UIBackgroundModes": ["audio", "voip"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Token Server (already exists)
|
||||
|
||||
- **URL**: `https://wellnuo.smartlaunchhub.com/julia/token`
|
||||
- **Method**: POST
|
||||
- **Body**: `{ "userId": "string" }`
|
||||
- **Response**: `{ "success": true, "data": { "token", "roomName", "wsUrl" } }`
|
||||
|
||||
---
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
### Phase 1: Cleanup (DELETE old code)
|
||||
|
||||
- [ ] 1.1. Delete `app/voice-call.tsx` (current broken implementation)
|
||||
- [ ] 1.2. Keep `app/(tabs)/voice.tsx` (entry point) but simplify
|
||||
- [ ] 1.3. Keep `services/livekitService.ts` (token fetching)
|
||||
- [ ] 1.4. Keep `contexts/VoiceTranscriptContext.tsx` (transcript storage)
|
||||
- [ ] 1.5. Delete `components/VoiceIndicator.tsx` (unused)
|
||||
- [ ] 1.6. Delete `polyfills/livekit-globals.ts` (not needed with proper setup)
|
||||
|
||||
### Phase 2: New Architecture
|
||||
|
||||
- [ ] 2.1. Create `hooks/useLiveKitRoom.ts` — encapsulate all LiveKit logic
|
||||
- [ ] 2.2. Create `app/voice-call.tsx` — simple UI component using the hook
|
||||
- [ ] 2.3. Create `utils/audioSession.ts` — iOS AudioSession helper
|
||||
|
||||
### Phase 3: useLiveKitRoom Hook
|
||||
|
||||
**File**: `hooks/useLiveKitRoom.ts`
|
||||
|
||||
```typescript
|
||||
interface UseLiveKitRoomOptions {
|
||||
userId: string;
|
||||
onTranscript?: (role: 'user' | 'assistant', text: string) => void;
|
||||
}
|
||||
|
||||
interface UseLiveKitRoomReturn {
|
||||
// Connection state
|
||||
state: 'idle' | 'connecting' | 'connected' | 'reconnecting' | 'disconnected' | 'error';
|
||||
error: string | null;
|
||||
|
||||
// Call info
|
||||
roomName: string | null;
|
||||
callDuration: number; // seconds
|
||||
|
||||
// Audio state
|
||||
isMuted: boolean;
|
||||
isSpeaking: boolean; // agent is speaking
|
||||
|
||||
// Actions
|
||||
connect: () => Promise<void>;
|
||||
disconnect: () => Promise<void>;
|
||||
toggleMute: () => void;
|
||||
}
|
||||
```
|
||||
|
||||
**Implementation requirements**:
|
||||
1. MUST call `registerGlobals()` BEFORE importing `livekit-client`
|
||||
2. MUST configure iOS AudioSession BEFORE connecting to room
|
||||
3. MUST handle all RoomEvents properly
|
||||
4. MUST cleanup on unmount (disconnect, stop audio session)
|
||||
5. MUST handle background/foreground transitions
|
||||
|
||||
### Phase 4: iOS AudioSession Configuration
|
||||
|
||||
**Critical for microphone to work!**
|
||||
|
||||
```typescript
|
||||
// utils/audioSession.ts
|
||||
import { AudioSession } from '@livekit/react-native';
|
||||
import { Platform } from 'react-native';
|
||||
|
||||
export async function configureAudioForVoiceCall(): Promise<void> {
|
||||
if (Platform.OS !== 'ios') return;
|
||||
|
||||
// Step 1: Set Apple audio configuration
|
||||
await AudioSession.setAppleAudioConfiguration({
|
||||
audioCategory: 'playAndRecord',
|
||||
audioCategoryOptions: [
|
||||
'allowBluetooth',
|
||||
'allowBluetoothA2DP',
|
||||
'defaultToSpeaker',
|
||||
'mixWithOthers',
|
||||
],
|
||||
audioMode: 'voiceChat',
|
||||
});
|
||||
|
||||
// Step 2: Configure output
|
||||
await AudioSession.configureAudio({
|
||||
ios: {
|
||||
defaultOutput: 'speaker',
|
||||
},
|
||||
});
|
||||
|
||||
// Step 3: Start session
|
||||
await AudioSession.startAudioSession();
|
||||
}
|
||||
|
||||
export async function stopAudioSession(): Promise<void> {
|
||||
if (Platform.OS !== 'ios') return;
|
||||
await AudioSession.stopAudioSession();
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 5: Voice Call Screen UI
|
||||
|
||||
**File**: `app/voice-call.tsx`
|
||||
|
||||
Simple, clean UI:
|
||||
- Avatar with Julia "J" letter
|
||||
- Call duration timer
|
||||
- Status text (Connecting... / Connected / Julia is speaking...)
|
||||
- Mute button
|
||||
- End call button
|
||||
- Debug logs toggle (for development)
|
||||
|
||||
**NO complex logic in this file** — all LiveKit logic in the hook!
|
||||
|
||||
### Phase 6: Testing Checklist
|
||||
|
||||
- [ ] 6.1. Fresh app launch → Start call → Can hear Julia greeting
|
||||
- [ ] 6.2. Speak → Julia responds → Conversation works
|
||||
- [ ] 6.3. Mute → Unmute → Still works
|
||||
- [ ] 6.4. End call → Clean disconnect
|
||||
- [ ] 6.5. App to background → Audio continues
|
||||
- [ ] 6.6. App to foreground → Still connected
|
||||
- [ ] 6.7. Multiple calls in a row → No memory leaks
|
||||
- [ ] 6.8. No microphone permission → Shows error
|
||||
|
||||
---
|
||||
|
||||
## Files to Create/Modify
|
||||
|
||||
| File | Action | Description |
|
||||
|------|--------|-------------|
|
||||
| `hooks/useLiveKitRoom.ts` | CREATE | Main LiveKit hook with all logic |
|
||||
| `utils/audioSession.ts` | CREATE | iOS AudioSession helpers |
|
||||
| `app/voice-call.tsx` | REPLACE | Simple UI using the hook |
|
||||
| `app/(tabs)/voice.tsx` | SIMPLIFY | Just entry point, remove debug UI |
|
||||
| `services/livekitService.ts` | KEEP | Token fetching (already works) |
|
||||
| `contexts/VoiceTranscriptContext.tsx` | KEEP | Transcript storage |
|
||||
| `components/VoiceIndicator.tsx` | DELETE | Not needed |
|
||||
| `polyfills/livekit-globals.ts` | DELETE | Not needed |
|
||||
|
||||
---
|
||||
|
||||
## Key Principles
|
||||
|
||||
### 1. Separation of Concerns
|
||||
- **Hook** handles ALL LiveKit/WebRTC logic
|
||||
- **Screen** only renders UI based on hook state
|
||||
- **Utils** for platform-specific code (AudioSession)
|
||||
|
||||
### 2. Proper Initialization Order
|
||||
```
|
||||
1. registerGlobals() — WebRTC polyfills
|
||||
2. configureAudioForVoiceCall() — iOS audio
|
||||
3. getToken() — fetch from server
|
||||
4. room.connect() — connect to LiveKit
|
||||
5. room.localParticipant.setMicrophoneEnabled(true) — enable mic
|
||||
```
|
||||
|
||||
### 3. Proper Cleanup Order
|
||||
```
|
||||
1. room.disconnect() — leave room
|
||||
2. stopAudioSession() — release iOS audio
|
||||
3. Clear all refs and state
|
||||
```
|
||||
|
||||
### 4. Error Handling
|
||||
- Every async operation wrapped in try/catch
|
||||
- User-friendly error messages
|
||||
- Automatic retry for network issues
|
||||
- Graceful degradation
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
1. ✅ User can start voice call and hear Julia greeting
|
||||
2. ✅ User can speak and Julia understands (STT works reliably)
|
||||
3. ✅ Julia responds with voice (TTS works)
|
||||
4. ✅ Conversation can continue back and forth
|
||||
5. ✅ Mute/unmute works
|
||||
6. ✅ End call cleanly disconnects
|
||||
7. ✅ No console errors or warnings
|
||||
8. ✅ Works on iOS device (not just simulator)
|
||||
|
||||
---
|
||||
|
||||
## Related Links
|
||||
|
||||
- [LiveKit React Native SDK](https://docs.livekit.io/client-sdk-js/react-native/)
|
||||
- [LiveKit Agents Python](https://docs.livekit.io/agents/)
|
||||
- [Deepgram STT/TTS](https://deepgram.com/)
|
||||
- [iOS AVAudioSession](https://developer.apple.com/documentation/avfaudio/avaudiosession)
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
### Why previous approach failed:
|
||||
|
||||
1. **Too much code in one file** — voice-call.tsx had 900+ lines with all logic mixed
|
||||
2. **Polyfills applied wrong** — Event class polyfill was inside the component
|
||||
3. **AudioSession configured too late** — sometimes after connect() already started
|
||||
4. **No proper error boundaries** — errors silently failed
|
||||
5. **Race conditions** — multiple async operations without proper sequencing
|
||||
|
||||
### What's different this time:
|
||||
|
||||
1. **Hook-based architecture** — single source of truth for state
|
||||
2. **Proper initialization sequence** — documented and enforced
|
||||
3. **Clean separation** — UI knows nothing about WebRTC
|
||||
4. **Comprehensive logging** — every step logged for debugging
|
||||
5. **Test-driven** — write tests before implementation
|
||||
@ -1,381 +0,0 @@
|
||||
/**
|
||||
* Audio Session Configuration Helpers (iOS + Android)
|
||||
*
|
||||
* CRITICAL: This must be configured BEFORE connecting to LiveKit room!
|
||||
* Without proper AudioSession setup, microphone won't work on iOS.
|
||||
* On Android, this controls speaker/earpiece routing.
|
||||
*/
|
||||
|
||||
import { Platform } from 'react-native';
|
||||
|
||||
/**
|
||||
* Represents an available audio output device
|
||||
*/
|
||||
export interface AudioOutputDevice {
|
||||
id: string;
|
||||
name: string;
|
||||
type: 'speaker' | 'earpiece' | 'bluetooth' | 'headphones' | 'unknown';
|
||||
}
|
||||
|
||||
// AudioSession module - use 'any' to avoid complex typing issues with @livekit/react-native
|
||||
// The actual AudioSession from LiveKit has specific enum types that are hard to match statically
|
||||
let audioSessionModule: any = null;
|
||||
|
||||
/**
|
||||
* Import AudioSession module lazily
|
||||
* This is needed because @livekit/react-native must be imported after registerGlobals()
|
||||
*/
|
||||
async function getAudioSession(): Promise<any | null> {
|
||||
if (!audioSessionModule) {
|
||||
const livekit = await import('@livekit/react-native');
|
||||
audioSessionModule = livekit.AudioSession;
|
||||
}
|
||||
|
||||
return audioSessionModule;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure AudioSession for bidirectional voice call (iOS + Android)
|
||||
*
|
||||
* MUST be called BEFORE connecting to LiveKit room!
|
||||
*
|
||||
* iOS Configuration:
|
||||
* - Category: playAndRecord (both speaker and mic)
|
||||
* - Mode: voiceChat (optimized for voice calls)
|
||||
* - Options: Bluetooth, speaker, mix with others
|
||||
*
|
||||
* Android Configuration:
|
||||
* - audioTypeOptions: communication (for voice calls)
|
||||
* - forceHandleAudioRouting: true (to control speaker/earpiece)
|
||||
*/
|
||||
export async function configureAudioForVoiceCall(): Promise<void> {
|
||||
console.log(`[AudioSession] Configuring for voice call on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
console.error('[AudioSession] Failed to get AudioSession module');
|
||||
return;
|
||||
}
|
||||
|
||||
if (Platform.OS === 'ios') {
|
||||
// iOS-specific configuration - FORCE SPEAKER OUTPUT
|
||||
// Using videoChat mode + defaultSpeakerOutput option for guaranteed speaker
|
||||
console.log('[AudioSession] Configuring iOS for SPEAKER output...');
|
||||
|
||||
try {
|
||||
// Primary config: videoChat mode with defaultSpeakerOutput
|
||||
await AudioSession.setAppleAudioConfiguration({
|
||||
audioCategory: 'playAndRecord',
|
||||
audioCategoryOptions: [
|
||||
'allowBluetooth',
|
||||
'mixWithOthers',
|
||||
'defaultToSpeaker', // KEY: Forces speaker as default output
|
||||
],
|
||||
audioMode: 'videoChat', // videoChat mode uses speaker by default
|
||||
});
|
||||
console.log('[AudioSession] iOS videoChat + defaultToSpeaker configured!');
|
||||
} catch (err) {
|
||||
console.warn('[AudioSession] Primary iOS config failed, trying fallback:', err);
|
||||
// Fallback: just videoChat without defaultToSpeaker option
|
||||
await AudioSession.setAppleAudioConfiguration({
|
||||
audioCategory: 'playAndRecord',
|
||||
audioCategoryOptions: ['allowBluetooth', 'mixWithOthers'],
|
||||
audioMode: 'videoChat',
|
||||
});
|
||||
}
|
||||
|
||||
console.log('[AudioSession] Starting iOS audio session...');
|
||||
await AudioSession.startAudioSession();
|
||||
|
||||
// Additionally set default output to speaker (belt and suspenders)
|
||||
try {
|
||||
console.log('[AudioSession] Setting iOS default output to speaker...');
|
||||
await AudioSession.configureAudio({
|
||||
ios: {
|
||||
defaultOutput: 'speaker',
|
||||
},
|
||||
});
|
||||
console.log('[AudioSession] iOS speaker output set!');
|
||||
} catch (outputErr) {
|
||||
console.warn('[AudioSession] Could not set speaker output:', outputErr);
|
||||
}
|
||||
} else if (Platform.OS === 'android') {
|
||||
// ============================================================
|
||||
// HYPOTHESIS 2: audioStreamType = 'music' (instead of 'voiceCall')
|
||||
// Theory: STREAM_VOICE_CALL routes to earpiece, STREAM_MUSIC to speaker
|
||||
// ============================================================
|
||||
console.log('[AudioSession] === HYPOTHESIS 2: audioStreamType = music ===');
|
||||
|
||||
await AudioSession.configureAudio({
|
||||
android: {
|
||||
audioTypeOptions: {
|
||||
manageAudioFocus: true,
|
||||
audioMode: 'inCommunication', // DEFAULT
|
||||
audioFocusMode: 'gain',
|
||||
audioStreamType: 'music', // <-- CHANGED from 'voiceCall'
|
||||
audioAttributesUsageType: 'voiceCommunication', // DEFAULT
|
||||
audioAttributesContentType: 'speech', // DEFAULT
|
||||
},
|
||||
preferredOutputList: ['speaker'],
|
||||
},
|
||||
});
|
||||
|
||||
console.log('[AudioSession] Starting Android audio session...');
|
||||
await AudioSession.startAudioSession();
|
||||
console.log('[AudioSession] Android audio session STARTED');
|
||||
}
|
||||
|
||||
console.log('[AudioSession] Configuration complete!');
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] Configuration error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop AudioSession (iOS + Android)
|
||||
*
|
||||
* Should be called when disconnecting from voice call
|
||||
*/
|
||||
export async function stopAudioSession(): Promise<void> {
|
||||
if (Platform.OS !== 'ios' && Platform.OS !== 'android') {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[AudioSession] Stopping audio session on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
return;
|
||||
}
|
||||
|
||||
await AudioSession.stopAudioSession();
|
||||
console.log('[AudioSession] Stopped');
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] Error stopping:', error);
|
||||
// Don't throw - cleanup errors are not critical
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconfigure audio session after remote track arrives (iOS + Android)
|
||||
*
|
||||
* Sometimes the OS needs a kick to properly route audio after remote participant joins
|
||||
*/
|
||||
export async function reconfigureAudioForPlayback(): Promise<void> {
|
||||
if (Platform.OS !== 'ios' && Platform.OS !== 'android') {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[AudioSession] Reconfiguring for playback (SPEAKER) on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (Platform.OS === 'ios') {
|
||||
// Reconfigure iOS - force speaker output
|
||||
await AudioSession.setAppleAudioConfiguration({
|
||||
audioCategory: 'playAndRecord',
|
||||
audioCategoryOptions: [
|
||||
'allowBluetooth',
|
||||
'mixWithOthers',
|
||||
'defaultToSpeaker', // Force speaker
|
||||
],
|
||||
audioMode: 'videoChat', // videoChat = speaker by default
|
||||
});
|
||||
|
||||
// Also set default output to speaker
|
||||
await AudioSession.configureAudio({
|
||||
ios: {
|
||||
defaultOutput: 'speaker',
|
||||
},
|
||||
});
|
||||
console.log('[AudioSession] iOS reconfigured for speaker playback');
|
||||
} else if (Platform.OS === 'android') {
|
||||
// Reconfigure Android audio to ensure speaker output
|
||||
// Using inCommunication + music stream for reliable speaker routing
|
||||
await AudioSession.configureAudio({
|
||||
android: {
|
||||
audioTypeOptions: {
|
||||
manageAudioFocus: true,
|
||||
audioMode: 'normal', // Use normal mode to keep speaker
|
||||
audioFocusMode: 'gain',
|
||||
audioStreamType: 'music',
|
||||
audioAttributesUsageType: 'media',
|
||||
audioAttributesContentType: 'music',
|
||||
},
|
||||
preferredOutputList: ['speaker'],
|
||||
forceHandleAudioRouting: true,
|
||||
},
|
||||
});
|
||||
|
||||
// CRITICAL: Force speaker via selectAudioOutput
|
||||
try {
|
||||
await AudioSession.selectAudioOutput('speaker');
|
||||
console.log('[AudioSession] Android selectAudioOutput(speaker) SUCCESS!');
|
||||
} catch (e) {
|
||||
console.warn('[AudioSession] selectAudioOutput failed:', e);
|
||||
}
|
||||
|
||||
console.log('[AudioSession] Android reconfigured for SPEAKER playback');
|
||||
}
|
||||
|
||||
console.log('[AudioSession] Reconfigured successfully');
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] Reconfigure error:', error);
|
||||
// Don't throw - this is a best-effort operation
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Switch audio output between speaker and earpiece (iOS + Android)
|
||||
*
|
||||
* @param useSpeaker - true for speaker, false for earpiece
|
||||
*/
|
||||
/**
|
||||
* Get list of available audio output devices
|
||||
*
|
||||
* @returns Array of available audio output devices
|
||||
*/
|
||||
export async function getAvailableAudioOutputs(): Promise<AudioOutputDevice[]> {
|
||||
console.log(`[AudioSession] Getting available audio outputs on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
console.error('[AudioSession] Failed to get AudioSession module');
|
||||
return [];
|
||||
}
|
||||
|
||||
const outputs = await AudioSession.getAudioOutputs();
|
||||
console.log('[AudioSession] Available outputs:', outputs);
|
||||
|
||||
// Map the raw outputs to our AudioOutputDevice interface
|
||||
if (Array.isArray(outputs)) {
|
||||
return outputs.map((output: any) => ({
|
||||
id: output.id || output.deviceId || String(output),
|
||||
name: output.name || output.deviceName || String(output),
|
||||
type: mapDeviceType(output.type || output.deviceType),
|
||||
}));
|
||||
}
|
||||
|
||||
return [];
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] getAvailableAudioOutputs error:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a specific audio output device by ID
|
||||
*
|
||||
* @param deviceId - The ID of the device to select
|
||||
*/
|
||||
export async function selectAudioOutput(deviceId: string): Promise<void> {
|
||||
console.log(`[AudioSession] Selecting audio output: ${deviceId} on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
console.error('[AudioSession] Failed to get AudioSession module');
|
||||
return;
|
||||
}
|
||||
|
||||
await AudioSession.selectAudioOutput(deviceId);
|
||||
console.log(`[AudioSession] Audio output selected: ${deviceId}`);
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] selectAudioOutput error:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map raw device type to our AudioOutputDevice type
|
||||
*/
|
||||
function mapDeviceType(rawType: string | undefined): AudioOutputDevice['type'] {
|
||||
if (!rawType) return 'unknown';
|
||||
|
||||
const type = rawType.toLowerCase();
|
||||
if (type.includes('speaker')) return 'speaker';
|
||||
if (type.includes('earpiece') || type.includes('receiver')) return 'earpiece';
|
||||
if (type.includes('bluetooth')) return 'bluetooth';
|
||||
if (type.includes('headphone') || type.includes('headset') || type.includes('wired')) return 'headphones';
|
||||
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Switch audio output between speaker and earpiece (iOS + Android)
|
||||
*
|
||||
* @param useSpeaker - true for speaker, false for earpiece
|
||||
*/
|
||||
export async function setAudioOutput(useSpeaker: boolean): Promise<void> {
|
||||
console.log(`[AudioSession] Setting audio output to ${useSpeaker ? 'SPEAKER' : 'EARPIECE'} on ${Platform.OS}...`);
|
||||
|
||||
try {
|
||||
const AudioSession = await getAudioSession();
|
||||
if (!AudioSession) {
|
||||
console.error('[AudioSession] Failed to get AudioSession module');
|
||||
return;
|
||||
}
|
||||
|
||||
if (Platform.OS === 'ios') {
|
||||
// iOS: Use selectAudioOutput with force_speaker
|
||||
try {
|
||||
await AudioSession.selectAudioOutput(useSpeaker ? 'force_speaker' : 'default');
|
||||
console.log(`[AudioSession] iOS selectAudioOutput: ${useSpeaker ? 'force_speaker' : 'default'}`);
|
||||
} catch (e) {
|
||||
console.warn('[AudioSession] selectAudioOutput failed, using fallback config');
|
||||
}
|
||||
|
||||
// Also configure audio mode
|
||||
await AudioSession.setAppleAudioConfiguration({
|
||||
audioCategory: 'playAndRecord',
|
||||
audioCategoryOptions: useSpeaker
|
||||
? ['allowBluetooth', 'mixWithOthers', 'defaultToSpeaker']
|
||||
: ['allowBluetooth', 'mixWithOthers'],
|
||||
audioMode: useSpeaker ? 'videoChat' : 'voiceChat',
|
||||
});
|
||||
|
||||
// Also set default output
|
||||
await AudioSession.configureAudio({
|
||||
ios: {
|
||||
defaultOutput: useSpeaker ? 'speaker' : 'earpiece',
|
||||
},
|
||||
});
|
||||
} else if (Platform.OS === 'android') {
|
||||
// Android: Use selectAudioOutput DIRECTLY - this calls setSpeakerphoneOn()
|
||||
// This is the MOST RELIABLE way to force speaker on Android!
|
||||
try {
|
||||
await AudioSession.selectAudioOutput(useSpeaker ? 'speaker' : 'earpiece');
|
||||
console.log(`[AudioSession] Android selectAudioOutput: ${useSpeaker ? 'speaker' : 'earpiece'}`);
|
||||
} catch (e) {
|
||||
console.warn('[AudioSession] selectAudioOutput failed:', e);
|
||||
}
|
||||
|
||||
// Also reconfigure audio settings as backup
|
||||
await AudioSession.configureAudio({
|
||||
android: {
|
||||
audioTypeOptions: {
|
||||
manageAudioFocus: true,
|
||||
audioMode: useSpeaker ? 'normal' : 'inCommunication',
|
||||
audioFocusMode: 'gain',
|
||||
audioStreamType: useSpeaker ? 'music' : 'voiceCall',
|
||||
audioAttributesUsageType: useSpeaker ? 'media' : 'voiceCommunication',
|
||||
audioAttributesContentType: useSpeaker ? 'music' : 'speech',
|
||||
},
|
||||
preferredOutputList: useSpeaker ? ['speaker'] : ['earpiece'],
|
||||
forceHandleAudioRouting: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[AudioSession] Audio output set to ${useSpeaker ? 'SPEAKER' : 'EARPIECE'}`);
|
||||
} catch (error) {
|
||||
console.error('[AudioSession] setAudioOutput error:', error);
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user