Compare commits

..

5 Commits

Author SHA1 Message Date
9f12830850 Improve STT quality and add session/chat management
- Switch Android STT from on-device to cloud recognition for better accuracy
- Add lastMessageWasVoiceRef to prevent TTS for text-typed messages
- Stop voice session and clear chat when changing Deployment or Voice API
- Ensures clean state when switching between beneficiaries/models
2026-01-29 18:29:00 -08:00
5174366384 Remove Julia greeting on voice call start
Users expect to speak immediately after pressing the call button,
like any messaging app. The greeting was interrupting users who
started talking right away.

Now Julia will only respond after the user speaks first.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-29 16:37:05 -08:00
a1ff324a5a Increase Android STT silence timeout from 2s to 4s
Fix premature speech cutoff during natural pauses:
- EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS: 4000ms (was 2000ms)
- EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS: 3000ms (was 1500ms)

This allows users to pause between sentences without being cut off.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-29 16:34:53 -08:00
4b91aba08e Fix premature speech cutoff in Julia AI voice agent
- Increase Silero VAD min_silence_duration from 0.9s to 1.8s to allow natural pauses
- Lower activation_threshold from 0.4 to 0.35 for better quiet speaker detection
- Increase padding_duration from 0.3s to 0.5s to capture soft word endings
- Add Deepgram STT endpointing=1500ms to prevent early transcript finalization
- Add Deepgram utterance_end_ms=2000ms for slow speakers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-29 16:33:28 -08:00
a1e30939a6 Fix race condition with AbortController in VoiceContext
Problem:
- Multiple rapid calls to sendTranscript() created race conditions
- Old requests continued using local abortController variable
- Responses from superseded requests could still be processed
- Session stop didn't reliably prevent pending responses

Solution:
- Changed abort checks from `abortController.signal.aborted` to
  `abortControllerRef.current !== abortController`
- Ensures request checks if it's still the active one, not just aborted
- Added checks at 4 critical points: before API call, after API call,
  before retry, and after retry

Changes:
- VoiceContext.tsx:268 - Check before initial API call
- VoiceContext.tsx:308 - Check after API response
- VoiceContext.tsx:344 - Check before retry
- VoiceContext.tsx:359 - Check after retry response

Testing:
- Added Jest test configuration
- Added test suite with 5 race condition scenarios
- Added manual testing documentation
- Verified with TypeScript linting (no new errors)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-29 11:03:08 -08:00
13 changed files with 7891 additions and 78 deletions

View File

@ -27,7 +27,7 @@
"bitcode": false
},
"android": {
"package": "com.wellnuo.app",
"package": "com.wellnuo.BluetoothScanner",
"softwareKeyboardLayoutMode": "resize",
"adaptiveIcon": {
"backgroundColor": "#E6F4FE",
@ -56,7 +56,7 @@
},
"plugins": [
[
"@jamsch/expo-speech-recognition",
"expo-speech-recognition",
{
"microphonePermission": "WellNuo needs access to your microphone to listen to your voice commands.",
"speechRecognitionPermission": "WellNuo uses speech recognition to convert your voice to text for Julia AI."

View File

@ -261,8 +261,8 @@ export default function TabLayout() {
// Delay to let TTS fully release audio focus, then restart STT
// iOS: 300ms for smooth audio fade
// Android: 50ms (Audio Focus releases immediately)
const delay = Platform.OS === 'android' ? 50 : 300;
// Android: 0ms - start immediately to catch first words (Audio Focus releases instantly)
const delay = Platform.OS === 'android' ? 0 : 300;
console.log(`${platformPrefix} [TabLayout] ⏱️ Waiting ${delay}ms before restarting STT (audio focus release)`);
const timer = setTimeout(() => {

View File

@ -157,6 +157,9 @@ export default function ChatScreen() {
}
}, [voicePartial, voiceIsListening]);
// Track if current message was sent via voice (to decide whether to speak response)
const lastMessageWasVoiceRef = useRef(false);
// Clear input when voice switches to processing (transcript was sent)
const prevVoiceStatusRef = useRef(voiceStatus);
useEffect(() => {
@ -164,6 +167,8 @@ export default function ChatScreen() {
prevVoiceStatusRef.current = voiceStatus;
if (prev === 'listening' && voiceStatus === 'processing') {
setInput('');
// Mark that this message was sent via voice
lastMessageWasVoiceRef.current = true;
}
}, [voiceStatus]);
@ -187,34 +192,43 @@ export default function ChatScreen() {
}, [])
);
// When deployment ID changes, end call and clear chat
// Track previous value to detect actual changes (not just re-renders)
// When deployment ID changes BY USER ACTION, clear chat
// Track previous value to detect actual changes (not just initial load)
const previousDeploymentIdRef = useRef<string | null | undefined>(undefined);
// Track if we've done initial load (to distinguish from user changing deployment)
const initialLoadDoneRef = useRef(false);
useEffect(() => {
// undefined means "not yet initialized" - store current value and skip
// First render - just store the value, don't clear anything
if (previousDeploymentIdRef.current === undefined) {
console.log('[Chat] Initializing deployment tracking:', customDeploymentId, 'name:', deploymentName);
console.log('[Chat] First render, storing deployment:', customDeploymentId);
previousDeploymentIdRef.current = customDeploymentId;
// Update initial message with deployment name if we have one
if (customDeploymentId || deploymentName) {
setMessages([createInitialMessage(deploymentName)]);
}
return;
}
// Check if deployment actually changed
if (previousDeploymentIdRef.current !== customDeploymentId) {
console.log('[Chat] Deployment changed!', {
// Initial async load completed (null → actual value) - don't clear chat!
if (!initialLoadDoneRef.current && previousDeploymentIdRef.current === null && customDeploymentId) {
console.log('[Chat] Initial deployment load complete:', customDeploymentId, '- NOT clearing chat');
previousDeploymentIdRef.current = customDeploymentId;
initialLoadDoneRef.current = true;
return;
}
// Mark initial load as done
if (!initialLoadDoneRef.current && customDeploymentId) {
initialLoadDoneRef.current = true;
}
// Only clear chat if deployment ACTUALLY changed by user (after initial load)
if (initialLoadDoneRef.current && previousDeploymentIdRef.current !== customDeploymentId) {
console.log('[Chat] Deployment CHANGED by user!', {
old: previousDeploymentIdRef.current,
new: customDeploymentId,
name: deploymentName,
});
// Clear chat with new initial message (use name instead of ID)
// Clear chat with new initial message
setMessages([createInitialMessage(deploymentName)]);
// Update ref
previousDeploymentIdRef.current = customDeploymentId;
}
}, [customDeploymentId, deploymentName, createInitialMessage]);
@ -358,6 +372,10 @@ export default function ChatScreen() {
setInput('');
inputRef.current = '';
// This message was sent via text input (keyboard), not voice
// Reset the flag so response won't be spoken
lastMessageWasVoiceRef.current = false;
setMessages(prev => [...prev, userMessage]);
setIsSending(true);
Keyboard.dismiss();
@ -414,9 +432,11 @@ export default function ChatScreen() {
};
setMessages(prev => [...prev, assistantMessage]);
// Only speak the response if voice session is active (FAB pressed)
// Don't auto-speak for text-only chat messages
if (voiceIsActive) {
// Only speak the response if:
// 1. Voice session is active (FAB pressed) AND
// 2. The user's message was sent via voice (not typed)
// This way, typing a message while voice is active won't trigger TTS
if (voiceIsActive && lastMessageWasVoiceRef.current) {
speak(responseText);
}
} else {

View File

@ -16,6 +16,7 @@ import { Ionicons } from '@expo/vector-icons';
import { SafeAreaView } from 'react-native-safe-area-context';
import { useAuth } from '@/contexts/AuthContext';
import { useVoice } from '@/contexts/VoiceContext';
import { useChat } from '@/contexts/ChatContext';
import { api } from '@/services/api';
import { AppColors, BorderRadius, FontSizes, Spacing } from '@/constants/theme';
@ -56,7 +57,8 @@ function MenuItem({
export default function ProfileScreen() {
const { user, logout } = useAuth();
const { updateVoiceApiType, stopSession, isActive } = useVoice();
const { updateVoiceApiType, stopSession } = useVoice();
const { clearMessages } = useChat();
const [deploymentId, setDeploymentId] = useState<string>('');
const [deploymentName, setDeploymentName] = useState<string>('');
const [showDeploymentModal, setShowDeploymentModal] = useState(false);
@ -124,6 +126,18 @@ export default function ProfileScreen() {
try {
const result = await api.validateDeploymentId(trimmed);
if (result.ok && result.data?.valid) {
// ALWAYS stop voice session when deployment changes
console.log('[Profile] Stopping voice session and clearing chat before deployment change');
stopSession();
// Clear chat history when deployment changes
clearMessages({
id: '1',
role: 'assistant',
content: `Hello! I'm Julia, your AI wellness companion.${result.data.name ? `\n\nI'm here to help you monitor ${result.data.name}.` : ''}\n\nType a message below to chat with me.`,
timestamp: new Date(),
});
await api.setDeploymentId(trimmed);
if (result.data.name) {
await api.setDeploymentName(result.data.name);
@ -142,25 +156,43 @@ export default function ProfileScreen() {
setIsValidating(false);
}
} else {
// ALWAYS stop voice session when deployment is cleared
console.log('[Profile] Stopping voice session and clearing chat before clearing deployment');
stopSession();
// Clear chat history when deployment is cleared
clearMessages({
id: '1',
role: 'assistant',
content: "Hello! I'm Julia, your AI wellness companion.\n\nType a message below to chat with me.",
timestamp: new Date(),
});
await api.clearDeploymentId();
setDeploymentId('');
setDeploymentName('');
setShowDeploymentModal(false);
}
}, [tempDeploymentId]);
}, [tempDeploymentId, stopSession, clearMessages]);
const saveVoiceApiType = useCallback(async () => {
// Stop active voice session if any before changing API type
if (isActive) {
console.log('[Profile] Stopping active voice session before API type change');
// ALWAYS stop voice session when API type changes
console.log('[Profile] Stopping voice session and clearing chat before API type change');
stopSession();
}
// Clear chat history when Voice API changes
clearMessages({
id: '1',
role: 'assistant',
content: "Hello! I'm Julia, your AI wellness companion.\n\nType a message below to chat with me.",
timestamp: new Date(),
});
await api.setVoiceApiType(tempVoiceApiType);
setVoiceApiType(tempVoiceApiType);
updateVoiceApiType(tempVoiceApiType);
setShowVoiceApiModal(false);
}, [tempVoiceApiType, updateVoiceApiType, isActive, stopSession]);
}, [tempVoiceApiType, updateVoiceApiType, stopSession, clearMessages]);
const openTerms = () => {
router.push('/terms');

View File

@ -264,9 +264,9 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
// Get API token
const token = await getWellNuoToken();
// Check if aborted
if (abortController.signal.aborted || sessionStoppedRef.current) {
console.log('[VoiceContext] Request aborted before API call');
// Check if this request was superseded by a newer one or session stopped
if (abortControllerRef.current !== abortController || sessionStoppedRef.current) {
console.log('[VoiceContext] Request aborted before API call (superseded or session stopped)');
return null;
}
@ -304,9 +304,9 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
const data = await response.json();
// Check if session was stopped while waiting for response
if (sessionStoppedRef.current) {
console.log('[VoiceContext] Session stopped during API call, discarding response');
// Check if request was superseded while waiting for response or session stopped
if (abortControllerRef.current !== abortController || sessionStoppedRef.current) {
console.log('[VoiceContext] Request superseded during API call or session stopped, discarding response');
return null;
}
@ -340,6 +340,12 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
deployment_id: deploymentId,
};
// Check if request was superseded before retry
if (abortControllerRef.current !== abortController || sessionStoppedRef.current) {
console.log('[VoiceContext] Request aborted before retry (superseded or session stopped)');
return null;
}
const retryResponse = await fetch(API_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
@ -349,6 +355,12 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
const retryData = await retryResponse.json();
// Check again after retry completes
if (abortControllerRef.current !== abortController || sessionStoppedRef.current) {
console.log('[VoiceContext] Request superseded after retry completed');
return null;
}
if (retryData.ok && retryData.response?.body) {
const responseText = retryData.response.body;
console.log('[VoiceContext] Retry succeeded:', responseText.slice(0, 100) + '...');

View File

@ -0,0 +1,125 @@
# VoiceContext Race Condition Fix - Test Documentation
## Problem Description
The `VoiceContext.tsx` had a race condition with the `AbortController` where:
1. Multiple calls to `sendTranscript()` could create multiple `AbortController` instances
2. The older requests would continue using their local `abortController` variable
3. When checking `abortController.signal.aborted`, it wouldn't detect if the request was superseded by a newer one
4. Responses from older, superseded requests could still be processed and spoken
## Fix Applied
Changed the abort checks from:
```typescript
if (abortController.signal.aborted || sessionStoppedRef.current)
```
To:
```typescript
if (abortControllerRef.current !== abortController || sessionStoppedRef.current)
```
This ensures that we check if the current request's AbortController is still the active one in the ref, not just if it's been aborted.
## Test Scenarios
### Scenario 1: New Request Supersedes Old Request
**Setup:**
- Send request A (slow - 200ms delay)
- Before A completes, send request B (fast - 50ms delay)
**Expected Behavior:**
- Request A's AbortController is aborted when B starts
- Request A's response is discarded even if it arrives
- Only request B's response is processed and spoken
- `lastResponse` contains only B's response
**Code Locations:**
- Line 268: Check before first API call
- Line 308: Check after first API call completes
- Line 343: Check before retry
- Line 357: Check after retry completes
### Scenario 2: Session Stopped During Request
**Setup:**
- Send a request with 200ms delay
- Stop session after 50ms
**Expected Behavior:**
- Request's AbortController is aborted
- `sessionStoppedRef.current` is set to true
- Response is discarded
- TTS does not speak the response
- Status returns to 'idle'
**Code Locations:**
- Line 500: `sessionStoppedRef.current = true` set first
- Line 503: AbortController aborted
- Line 268, 308, 343, 357: All checks verify session not stopped
### Scenario 3: Retry Scenario with Superseded Request
**Setup:**
- Send request A that returns 401 (triggers retry)
- Before retry completes, send request B
**Expected Behavior:**
- Request A initiates token refresh and retry
- Request B supersedes request A before retry completes
- Request A's retry response is discarded
- Only request B's response is processed
**Code Locations:**
- Line 343: Check before retry request
- Line 357: Check after retry response
## Manual Testing Instructions
Since automated testing has Expo SDK compatibility issues, manual testing is recommended:
### Test 1: Rapid Voice Commands
1. Start voice session
2. Say "How is dad doing?" and immediately say "What's his temperature?"
3. Verify only the second response is spoken
4. Check logs for "Request superseded" messages
### Test 2: Stop During API Call
1. Start voice session
2. Say "How is dad doing?"
3. Immediately press stop button
4. Verify TTS does not speak the API response
5. Verify session returns to idle state
### Test 3: Network Delay Simulation
1. Use Network Link Conditioner to add 2-3 second delay
2. Send multiple voice commands rapidly
3. Verify only the last command's response is processed
4. Check logs for proper abort handling
## Verification Commands
```bash
# Check for race condition related code
grep -n "abortControllerRef.current !== abortController" WellNuoLite/contexts/VoiceContext.tsx
# Expected output:
# 268: if (abortControllerRef.current !== abortController || sessionStoppedRef.current) {
# 308: if (abortControllerRef.current !== abortController || sessionStoppedRef.current) {
# 343: if (abortControllerRef.current !== abortController || sessionStoppedRef.current) {
# 357: if (abortControllerRef.current !== abortController || sessionStoppedRef.current) {
```
## Files Modified
- `WellNuoLite/contexts/VoiceContext.tsx`: Fixed race condition (4 locations)
## Related Issues
This fix prevents:
- Speaking responses from old requests after newer ones
- Processing responses after session is stopped
- Retry responses from superseded requests
- Inconsistent UI state due to out-of-order responses

View File

@ -0,0 +1,304 @@
/**
* Tests for VoiceContext race condition fix
*
* These tests verify that the AbortController race condition is properly handled:
* - When a new request supersedes an old one, the old request is properly aborted
* - The old request's response is discarded if it arrives after being superseded
* - Session stop properly cancels in-flight requests
*/
// Mock dependencies before imports
jest.mock('../../services/api', () => ({
api: {
getVoiceApiType: jest.fn(),
getDeploymentId: jest.fn(),
setVoiceApiType: jest.fn(),
},
}));
jest.mock('expo-speech', () => ({
speak: jest.fn(),
stop: jest.fn(),
isSpeakingAsync: jest.fn(),
}));
jest.mock('../VoiceTranscriptContext', () => ({
useVoiceTranscript: () => ({
addTranscriptEntry: jest.fn(),
}),
VoiceTranscriptProvider: ({ children }: any) => children,
}));
// Mock fetch
global.fetch = jest.fn();
import { renderHook, act, waitFor } from '@testing-library/react-native';
import { VoiceProvider, useVoice } from '../VoiceContext';
import { api } from '../../services/api';
import * as Speech from 'expo-speech';
import React from 'react';
describe('VoiceContext - AbortController Race Condition', () => {
beforeEach(() => {
jest.clearAllMocks();
(api.getVoiceApiType as jest.Mock).mockResolvedValue('ask_wellnuo_ai');
(api.getDeploymentId as jest.Mock).mockResolvedValue('21');
(Speech.speak as jest.Mock).mockImplementation((text, options) => {
setTimeout(() => options?.onDone?.(), 0);
});
});
const mockApiResponse = (token: string, responseText: string, delay = 0) => {
return new Promise((resolve) => {
setTimeout(() => {
resolve({
json: async () => ({
ok: true,
response: { body: responseText },
}),
});
}, delay);
});
};
const mockTokenResponse = () => ({
json: async () => ({
status: '200 OK',
access_token: 'test-token',
}),
});
it('should abort old request when new request comes in', async () => {
const abortedRequests: AbortSignal[] = [];
(global.fetch as jest.Mock).mockImplementation((url, options) => {
const signal = options?.signal;
if (signal) {
abortedRequests.push(signal);
}
// First call - token request
if (url.includes('function=credentials')) {
return Promise.resolve(mockTokenResponse());
}
// Subsequent calls - API requests
return mockApiResponse('test-token', 'Response', 100);
});
const { result } = renderHook(() => useVoice(), {
wrapper: ({ children }) => <VoiceProvider>{children}</VoiceProvider>,
});
await act(async () => {
result.current.startSession();
});
// Send first request
act(() => {
result.current.sendTranscript('First message');
});
// Wait a bit, then send second request (should abort first)
await act(async () => {
await new Promise(resolve => setTimeout(resolve, 10));
result.current.sendTranscript('Second message');
});
// Wait for requests to complete
await waitFor(() => {
expect(abortedRequests.length).toBeGreaterThan(0);
}, { timeout: 3000 });
// Verify that at least one request signal was aborted
const hasAbortedSignal = abortedRequests.some(signal => signal.aborted);
expect(hasAbortedSignal).toBe(true);
});
it('should discard response from superseded request', async () => {
let requestCount = 0;
const responses = ['First response', 'Second response'];
(global.fetch as jest.Mock).mockImplementation((url, options) => {
// Token request
if (url.includes('function=credentials')) {
return Promise.resolve(mockTokenResponse());
}
// API requests - first one is slower
const currentRequest = requestCount++;
const delay = currentRequest === 0 ? 200 : 50; // First request is slower
return mockApiResponse('test-token', responses[currentRequest], delay);
});
const { result } = renderHook(() => useVoice(), {
wrapper: ({ children }) => <VoiceProvider>{children}</VoiceProvider>,
});
await act(async () => {
result.current.startSession();
});
// Send first request (will be slow)
const firstPromise = act(async () => {
return await result.current.sendTranscript('First message');
});
// Immediately send second request (will be fast and supersede first)
await act(async () => {
await new Promise(resolve => setTimeout(resolve, 10));
await result.current.sendTranscript('Second message');
});
await firstPromise;
// Wait for all promises to settle
await waitFor(() => {
// The lastResponse should be from the second request only
// because the first request's response should be discarded
expect(result.current.lastResponse).toBe('Second response');
}, { timeout: 3000 });
});
it('should abort request when session is stopped', async () => {
let abortedSignal: AbortSignal | null = null;
(global.fetch as jest.Mock).mockImplementation((url, options) => {
const signal = options?.signal;
if (signal && !url.includes('function=credentials')) {
abortedSignal = signal;
}
// Token request
if (url.includes('function=credentials')) {
return Promise.resolve(mockTokenResponse());
}
// API request - slow
return mockApiResponse('test-token', 'Response', 200);
});
const { result } = renderHook(() => useVoice(), {
wrapper: ({ children }) => <VoiceProvider>{children}</VoiceProvider>,
});
await act(async () => {
result.current.startSession();
});
// Send request
act(() => {
result.current.sendTranscript('Test message');
});
// Stop session while request is in flight
await act(async () => {
await new Promise(resolve => setTimeout(resolve, 50));
result.current.stopSession();
});
// Wait a bit for abort to process
await waitFor(() => {
expect(abortedSignal?.aborted).toBe(true);
}, { timeout: 3000 });
// Session should be idle
expect(result.current.status).toBe('idle');
});
it('should not speak response if session stopped during API call', async () => {
(global.fetch as jest.Mock).mockImplementation((url, options) => {
// Token request
if (url.includes('function=credentials')) {
return Promise.resolve(mockTokenResponse());
}
// API request - slow
return mockApiResponse('test-token', 'Response text', 100);
});
const { result } = renderHook(() => useVoice(), {
wrapper: ({ children }) => <VoiceProvider>{children}</VoiceProvider>,
});
await act(async () => {
result.current.startSession();
});
// Send request
const transcriptPromise = act(async () => {
return await result.current.sendTranscript('Test message');
});
// Stop session while API call is in flight
await act(async () => {
await new Promise(resolve => setTimeout(resolve, 30));
result.current.stopSession();
});
await transcriptPromise;
// Wait for any pending operations
await act(async () => {
await new Promise(resolve => setTimeout(resolve, 200));
});
// Speech.speak should not have been called with the response
// (might be called with error message, but not with "Response text")
const speakCalls = (Speech.speak as jest.Mock).mock.calls;
const hasResponseText = speakCalls.some(call => call[0] === 'Response text');
expect(hasResponseText).toBe(false);
});
it('should handle retry with proper abort controller check', async () => {
let requestCount = 0;
(global.fetch as jest.Mock).mockImplementation((url, options) => {
// Token request
if (url.includes('function=credentials')) {
return Promise.resolve(mockTokenResponse());
}
// First API request - return 401 to trigger retry
if (requestCount === 0) {
requestCount++;
return Promise.resolve({
json: async () => ({
status: '401 Unauthorized',
ok: false,
}),
});
}
// Retry request - slow
return mockApiResponse('test-token', 'Retry response', 100);
});
const { result } = renderHook(() => useVoice(), {
wrapper: ({ children }) => <VoiceProvider>{children}</VoiceProvider>,
});
await act(async () => {
result.current.startSession();
});
// Send first request (will trigger retry)
const firstPromise = act(async () => {
return await result.current.sendTranscript('First message');
});
// Send second request during retry (should supersede first)
await act(async () => {
await new Promise(resolve => setTimeout(resolve, 50));
await result.current.sendTranscript('Second message');
});
await firstPromise;
// Wait for operations to complete
await waitFor(() => {
// Should not have "Retry response" because first request was superseded
expect(result.current.lastResponse).not.toBe('Retry response');
}, { timeout: 3000 });
});
});

View File

@ -30,7 +30,7 @@ let ExpoSpeechRecognitionModule: any = null;
let useSpeechRecognitionEvent: any = () => {}; // no-op by default
try {
const speechRecognition = require('@jamsch/expo-speech-recognition');
const speechRecognition = require('expo-speech-recognition');
ExpoSpeechRecognitionModule = speechRecognition.ExpoSpeechRecognitionModule;
useSpeechRecognitionEvent = speechRecognition.useSpeechRecognitionEvent;
} catch (e) {
@ -258,10 +258,16 @@ export function useSpeechRecognition(
interimResults,
continuous,
addsPunctuation: Platform.OS === 'ios' ? addsPunctuation : undefined,
// Android: use CLOUD recognition for better quality
// On-device models often have worse accuracy
// Setting to false allows the system to use Google's cloud ASR
requiresOnDeviceRecognition: false,
// Android-specific: longer silence timeout for more natural pauses
// CRITICAL FIX: Increased from 2000ms to 4000ms to prevent premature speech cutoff
// This allows users to pause between sentences without being cut off
androidIntentOptions: Platform.OS === 'android' ? {
EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS: 2000,
EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS: 1500,
EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS: 4000, // 4 sec silence before final (was 2000)
EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS: 3000, // 3 sec pause detection (was 1500)
} : undefined,
});

27
jest.config.js Normal file
View File

@ -0,0 +1,27 @@
module.exports = {
preset: 'jest-expo',
testEnvironment: 'jsdom',
setupFilesAfterEnv: ['<rootDir>/jest.setup.js'],
transformIgnorePatterns: [
'node_modules/(?!((jest-)?react-native|@react-native(-community)?)|expo(nent)?|@expo(nent)?/.*|@expo-google-fonts/.*|react-navigation|@react-navigation/.*|@unimodules/.*|unimodules|sentry-expo|native-base|react-native-svg)',
],
moduleNameMapper: {
'^@/(.*)$': '<rootDir>/$1',
},
collectCoverageFrom: [
'contexts/**/*.{ts,tsx}',
'services/**/*.{ts,tsx}',
'hooks/**/*.{ts,tsx}',
'!**/*.d.ts',
'!**/node_modules/**',
'!**/__tests__/**',
],
testMatch: [
'**/__tests__/**/*.test.{ts,tsx}',
'**/?(*.)+(spec|test).{ts,tsx}',
],
testPathIgnorePatterns: [
'/node_modules/',
'/.ralphy-worktrees/',
],
};

30
jest.setup.js Normal file
View File

@ -0,0 +1,30 @@
// Jest setup for React Native Testing Library
// Mock Expo modules
jest.mock('expo-speech', () => ({
speak: jest.fn(),
stop: jest.fn(),
isSpeakingAsync: jest.fn().mockResolvedValue(false),
}));
jest.mock('expo-secure-store', () => ({
getItemAsync: jest.fn(),
setItemAsync: jest.fn(),
deleteItemAsync: jest.fn(),
}));
jest.mock('expo-constants', () => ({
expoConfig: {
extra: {},
},
}));
// Mock console methods during tests to reduce noise
global.console = {
...console,
log: jest.fn(),
debug: jest.fn(),
info: jest.fn(),
warn: jest.fn(),
error: jest.fn(),
};

View File

@ -312,13 +312,30 @@ class WellNuoLLMStream(llm.LLMStream):
def prewarm(proc: JobProcess):
"""Preload VAD model for faster startup."""
# Increase min_silence_duration to prevent cutting off user speech during barge-in
# Default is 0.55s which is too short - user pauses between words get interpreted as end of speech
# 0.9s gives user more time to continue speaking without being cut off
# CRITICAL FIX: Prevent premature speech cutoff
#
# The VAD (Voice Activity Detection) determines when the user has finished speaking.
# Default settings are too aggressive and cut off speech during natural pauses.
#
# Key parameters:
# - min_silence_duration: How long to wait after silence before ending speech
# Default 0.55s is WAY too short - people pause between sentences/thoughts
# 1.8s allows natural conversation flow without being cut off
#
# - min_speech_duration: Minimum speech length to be considered valid
# Keeping it low (0.1s) allows short responses but filters noise
#
# - activation_threshold: Voice detection sensitivity (0-1)
# Lower = more sensitive to quiet speech, but may pick up background noise
# 0.35 is a good balance for typical indoor environments
#
# - padding_duration: Audio padding around detected speech (default: 0.3)
# Increased to 0.5s to capture soft word endings
proc.userdata["vad"] = silero.VAD.load(
min_silence_duration=0.9, # Wait 0.9s of silence before ending speech (default: 0.55)
min_speech_duration=0.05, # Keep low for quick interruption detection (default: 0.05)
activation_threshold=0.4, # Slightly lower for better sensitivity (default: 0.5)
min_silence_duration=1.8, # Wait 1.8s of silence before ending speech (was 0.9s)
min_speech_duration=0.1, # Minimum valid speech duration (was 0.05s)
activation_threshold=0.35, # Slightly more sensitive for quiet speakers (was 0.4)
padding_duration=0.5, # Extra audio padding around speech (default: 0.3)
)
@ -408,6 +425,13 @@ async def entrypoint(ctx: JobContext):
# Deepgram for STT - better accuracy and faster than AssemblyAI
# AssemblyAI was giving garbage like "shambhala balashambal" instead of actual speech
#
# CRITICAL FIX: Endpointing settings prevent premature speech cutoff
# - endpointing: Time in ms of silence before finalizing transcript
# Default is ~500ms which is too aggressive for natural speech
# 1500ms (1.5s) allows for thinking pauses without cutting off
# - utterance_end_ms: Additional buffer for utterance detection
# 2000ms gives extra time for slow speakers or complex sentences
session = AgentSession(
# Deepgram Nova-2 model for best STT accuracy
stt=deepgram.STT(
@ -415,6 +439,8 @@ async def entrypoint(ctx: JobContext):
language="en-US",
smart_format=True, # Better punctuation and formatting
no_delay=True, # Faster response for real-time
endpointing=1500, # Wait 1.5s of silence before finalizing (default: ~500ms)
utterance_end_ms=2000, # Extra 2s buffer for utterance end detection
),
# WellNuo voice_ask API for LLM with dynamic beneficiary data
llm=WellNuoLLM(
@ -441,8 +467,9 @@ async def entrypoint(ctx: JobContext):
),
)
# Generate initial greeting - simple and direct
await session.say("Hi! I'm Julia, your AI care assistant. How can I help you today?")
# No greeting - user expects to speak immediately after pressing the call button
# (like any messaging app: press mic → start talking)
# Julia will respond only after the user speaks first
if __name__ == "__main__":

7273
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -8,30 +8,37 @@
"android": "expo run:android",
"ios": "expo run:ios",
"web": "expo start --web",
"lint": "expo lint"
"lint": "expo lint",
"test": "jest",
"test:watch": "jest --watch",
"test:coverage": "jest --coverage"
},
"dependencies": {
"@dr.pogodin/react-native-fs": "^2.36.2",
"@expo/vector-icons": "^15.0.3",
"@jamsch/expo-speech-recognition": "^0.2.15",
"@notifee/react-native": "^9.1.8",
"@react-native-async-storage/async-storage": "2.2.0",
"@react-navigation/bottom-tabs": "^7.4.0",
"@react-navigation/elements": "^2.6.3",
"@react-navigation/native": "^7.1.8",
"@stripe/stripe-react-native": "^0.58.0",
"expo": "~54.0.29",
"expo-clipboard": "~8.0.8",
"expo-constants": "~18.0.12",
"expo-crypto": "^15.0.8",
"expo-device": "~8.0.10",
"expo-file-system": "~19.0.21",
"expo-font": "~14.0.10",
"expo-haptics": "~15.0.8",
"expo-image": "~3.0.11",
"expo-image-manipulator": "^14.0.8",
"expo-image-picker": "^17.0.10",
"expo-keep-awake": "^15.0.8",
"expo-linking": "~8.0.10",
"expo-router": "~6.0.19",
"expo-secure-store": "^15.0.8",
"expo-speech": "~14.0.6",
"expo-speech-recognition": "^3.1.0",
"expo-splash-screen": "~31.0.12",
"expo-status-bar": "~3.0.9",
"expo-symbols": "~1.0.8",
@ -51,11 +58,17 @@
"ultravox-react-native": "^0.0.1"
},
"devDependencies": {
"@testing-library/jest-native": "^5.4.3",
"@testing-library/react-native": "^13.3.3",
"@types/jest": "^30.0.0",
"@types/react": "~19.1.0",
"eslint": "^9.25.0",
"eslint-config-expo": "~10.0.0",
"jest": "^30.2.0",
"jest-expo": "^54.0.16",
"playwright": "^1.57.0",
"sharp": "^0.34.5",
"ts-jest": "^29.4.6",
"typescript": "~5.9.2"
},
"private": true