wellnua-lite/contexts/VoiceContext.tsx
Sergei 8c0e36cae3 Fix Android voice bugs - STT restart and token retry
Critical Android fixes:

BUG 1 - STT not restarting after TTS:
- Problem: isSpeaking delay (300ms iOS visual) blocked Android STT
- Android audio focus conflict: STT cannot start while isSpeaking=true
- Fix: Platform-specific isSpeaking timing
  - iOS: 300ms delay (smooth visual indicator)
  - Android: immediate (allows STT to restart)

BUG 2 - Session expired loop:
- Problem: 401 error → token reset → no retry → user hears error
- Fix: Automatic token refresh and retry on 401
- Flow: 401 → clear token → get new token → retry request
- User never hears "Session expired" unless retry also fails

contexts/VoiceContext.tsx:12-23,387-360
2026-01-28 20:43:42 -08:00

559 lines
17 KiB
TypeScript

/**
* Voice Context - Local STT/TTS integration with WellNuo API
*
* Provides voice session management:
* - STT (Speech-to-Text) via expo-speech-recognition
* - API calls to WellNuo ask_wellnuo_ai
* - TTS (Text-to-Speech) via expo-speech
*
* Flow: User speaks → STT → API → Response → TTS → Continue listening
*/
import React, {
createContext,
useContext,
useState,
useCallback,
useRef,
ReactNode,
} from 'react';
import { Platform } from 'react-native';
import * as Speech from 'expo-speech';
import { api } from '@/services/api';
import { useVoiceTranscript } from './VoiceTranscriptContext';
// WellNuo API configuration (same as chat.tsx)
const API_URL = 'https://eluxnetworks.net/function/well-api/api';
const WELLNUO_USER = 'anandk';
const WELLNUO_PASSWORD = 'anandk_8';
// Single deployment mode - sends only deployment_id (no beneficiary_names_dict)
const SINGLE_DEPLOYMENT_MODE = true;
// Keywords for question normalization (same as chat.tsx)
const STATUS_KEYWORDS = [
/\bhow\s+is\b/i,
/\bhow'?s\b/i,
/\bhow\s+are\b/i,
/\btell\s+me\s+about\b/i,
/\bwhat'?s\s+up\s+with\b/i,
/\bupdate\s+on\b/i,
/\bstatus\b/i,
/\bdoing\b/i,
/\bfeeling\b/i,
/\bcheck\s+on\b/i,
/\bis\s+\w+\s+okay\b/i,
/\bis\s+\w+\s+alright\b/i,
/\bis\s+\w+\s+fine\b/i,
/\bokay\?\b/i,
/\balright\?\b/i,
];
const SUBJECT_KEYWORDS = [
/\bdad\b/i,
/\bfather\b/i,
/\bferdinand\b/i,
/\bhim\b/i,
/\bhe\b/i,
/\bmy\s+dad\b/i,
/\bmy\s+father\b/i,
/\bthe\s+patient\b/i,
/\bloved\s+one\b/i,
/\bparent\b/i,
/\bgrandpa\b/i,
/\bgrandfather\b/i,
];
/**
* Normalize question for WellNuo API (same logic as chat.tsx)
*/
function normalizeQuestion(userMessage: string): string {
const msgLower = userMessage.toLowerCase().trim();
const isStatusQuery = STATUS_KEYWORDS.some((pattern) => pattern.test(msgLower));
const isAboutRecipient = SUBJECT_KEYWORDS.some((pattern) => pattern.test(msgLower));
if (isStatusQuery && isAboutRecipient) {
console.log(`[VoiceContext] Normalized '${userMessage}' -> 'how is dad doing'`);
return 'how is dad doing';
}
if (isStatusQuery && !isAboutRecipient) {
console.log(
`[VoiceContext] Normalized '${userMessage}' -> 'how is dad doing' (assumed recipient)`
);
return 'how is dad doing';
}
console.log(`[VoiceContext] No normalization applied to: '${userMessage}'`);
return userMessage;
}
export type VoiceStatus = 'idle' | 'listening' | 'processing' | 'speaking';
interface VoiceContextValue {
// Current status of the voice session
status: VoiceStatus;
// Whether voice session is active (not idle)
isActive: boolean;
// Whether STT is currently listening
isListening: boolean;
// Whether TTS is currently speaking
isSpeaking: boolean;
// Whether processing API request
isProcessing: boolean;
// Current/last transcript from STT
transcript: string;
// Partial transcript (real-time preview)
partialTranscript: string;
// Last API response
lastResponse: string | null;
// Error message if any
error: string | null;
// Start voice session (begin listening)
startSession: () => void;
// Stop voice session
stopSession: () => void;
// Send transcript to API and get response with TTS
// Called automatically when STT detects speech end, or manually
sendTranscript: (text: string) => Promise<string | null>;
// Update transcript from external STT hook
setTranscript: (text: string) => void;
setPartialTranscript: (text: string) => void;
// Set status from external STT/TTS hooks
setStatus: (status: VoiceStatus) => void;
setIsListening: (listening: boolean) => void;
setIsSpeaking: (speaking: boolean) => void;
// Speak text using TTS
speak: (text: string) => Promise<void>;
// Stop TTS
stopSpeaking: () => void;
// Interrupt TTS if speaking (call when user starts talking)
interruptIfSpeaking: () => boolean;
// Voice API configuration
voiceApiType: 'voice_ask' | 'ask_wellnuo_ai';
updateVoiceApiType: (type: 'voice_ask' | 'ask_wellnuo_ai') => void;
}
const VoiceContext = createContext<VoiceContextValue | undefined>(undefined);
export function VoiceProvider({ children }: { children: ReactNode }) {
const [status, setStatus] = useState<VoiceStatus>('idle');
const [transcript, setTranscript] = useState('');
const [partialTranscript, setPartialTranscript] = useState('');
const [lastResponse, setLastResponse] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
const [isListening, setIsListening] = useState(false);
const [isSpeaking, setIsSpeaking] = useState(false);
// Voice transcript context for chat display
const { addTranscriptEntry } = useVoiceTranscript();
// API token cache
const apiTokenRef = useRef<string | null>(null);
// Abort controller for cancelling in-flight API requests
const abortControllerRef = useRef<AbortController | null>(null);
// Flag to prevent speak() after session stopped
const sessionStoppedRef = useRef(false);
// Deployment ID from settings
const deploymentIdRef = useRef<string | null>(null);
// Voice API type (voice_ask or ask_wellnuo_ai)
const [voiceApiType, setVoiceApiType] = useState<'voice_ask' | 'ask_wellnuo_ai'>('ask_wellnuo_ai');
// Load voice API type on mount
React.useEffect(() => {
const loadVoiceApiType = async () => {
const savedType = await api.getVoiceApiType();
setVoiceApiType(savedType);
console.log('[VoiceContext] Loaded voice API type:', savedType);
};
loadVoiceApiType();
}, []);
// Load deployment ID on mount
React.useEffect(() => {
const loadDeploymentId = async () => {
const savedId = await api.getDeploymentId();
deploymentIdRef.current = savedId;
console.log('[VoiceContext] Loaded deployment ID:', savedId);
};
loadDeploymentId();
}, []);
/**
* Update voice API type (voice_ask or ask_wellnuo_ai)
*/
const updateVoiceApiType = useCallback(async (type: 'voice_ask' | 'ask_wellnuo_ai') => {
console.log('[VoiceContext] Updating voice API type to:', type);
setVoiceApiType(type);
await api.setVoiceApiType(type);
}, []);
/**
* Get WellNuo API token (same as chat.tsx)
*/
const getWellNuoToken = useCallback(async (): Promise<string> => {
if (apiTokenRef.current) {
return apiTokenRef.current;
}
const nonce = Math.floor(Math.random() * 1000000).toString();
const response = await fetch(API_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: new URLSearchParams({
function: 'credentials',
clientId: 'MA_001',
user_name: WELLNUO_USER,
ps: WELLNUO_PASSWORD,
nonce: nonce,
}).toString(),
});
const data = await response.json();
if (data.status === '200 OK' && data.access_token) {
apiTokenRef.current = data.access_token;
console.log('[VoiceContext] WellNuo token obtained');
return data.access_token;
}
throw new Error('Failed to authenticate with WellNuo API');
}, []);
/**
* Send transcript to WellNuo API and speak the response
*/
const sendTranscript = useCallback(
async (text: string): Promise<string | null> => {
const trimmedText = text.trim();
if (!trimmedText) {
console.log('[VoiceContext] Empty transcript, skipping API call');
return null;
}
// Don't send if session was stopped
if (sessionStoppedRef.current) {
console.log('[VoiceContext] Session stopped, skipping API call');
return null;
}
console.log(`[VoiceContext] Sending transcript to API (${voiceApiType}):`, trimmedText);
setStatus('processing');
setError(null);
// Add user message to transcript for chat display
addTranscriptEntry('user', trimmedText);
// Create abort controller for this request
if (abortControllerRef.current) {
abortControllerRef.current.abort();
}
const abortController = new AbortController();
abortControllerRef.current = abortController;
try {
// Get API token
const token = await getWellNuoToken();
// Check if aborted
if (abortController.signal.aborted || sessionStoppedRef.current) {
console.log('[VoiceContext] Request aborted before API call');
return null;
}
// Normalize question
const normalizedQuestion = normalizeQuestion(trimmedText);
// Get deployment ID
const deploymentId = deploymentIdRef.current || '21';
// Log which API type we're using
console.log('[VoiceContext] Using API type:', voiceApiType);
// Build request params
const requestParams: Record<string, string> = {
function: voiceApiType, // Use the selected voiceApiType
clientId: 'MA_001',
user_name: WELLNUO_USER,
token: token,
question: normalizedQuestion,
deployment_id: deploymentId,
};
// Only add beneficiary_names_dict if NOT in single deployment mode
if (!SINGLE_DEPLOYMENT_MODE) {
// For full app, would include beneficiary names dict
// Currently single deployment mode only
}
const response = await fetch(API_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: new URLSearchParams(requestParams).toString(),
signal: abortController.signal,
});
const data = await response.json();
// Check if session was stopped while waiting for response
if (sessionStoppedRef.current) {
console.log('[VoiceContext] Session stopped during API call, discarding response');
return null;
}
if (data.ok && data.response?.body) {
const responseText = data.response.body;
console.log('[VoiceContext] API response:', responseText.slice(0, 100) + '...');
setLastResponse(responseText);
// Add Julia's response to transcript for chat display
addTranscriptEntry('assistant', responseText);
// Speak the response (will be skipped if session stopped)
await speak(responseText);
return responseText;
} else {
// Token might be expired - retry with new token
if (data.status === '401 Unauthorized') {
console.log('[VoiceContext] Token expired, retrying with new token...');
apiTokenRef.current = null;
// Get new token and retry request
const newToken = await getWellNuoToken();
const retryRequestParams: Record<string, string> = {
function: voiceApiType,
clientId: 'MA_001',
user_name: WELLNUO_USER,
token: newToken,
question: normalizedQuestion,
deployment_id: deploymentId,
};
const retryResponse = await fetch(API_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: new URLSearchParams(retryRequestParams).toString(),
signal: abortController.signal,
});
const retryData = await retryResponse.json();
if (retryData.ok && retryData.response?.body) {
const responseText = retryData.response.body;
console.log('[VoiceContext] Retry succeeded:', responseText.slice(0, 100) + '...');
setLastResponse(responseText);
addTranscriptEntry('assistant', responseText);
await speak(responseText);
return responseText;
} else {
throw new Error(retryData.message || 'Could not get response after retry');
}
}
throw new Error(data.message || 'Could not get response');
}
} catch (err) {
// Ignore abort errors
if (err instanceof Error && err.name === 'AbortError') {
console.log('[VoiceContext] API request aborted');
return null;
}
// Handle API errors gracefully with voice feedback
const errorMsg = err instanceof Error ? err.message : 'Unknown error';
console.warn('[VoiceContext] API error:', errorMsg);
// Create user-friendly error message for TTS
const spokenError = `Sorry, I encountered an error: ${errorMsg}. Please try again.`;
// Add error to transcript for chat display
addTranscriptEntry('assistant', spokenError);
// Speak the error message
await speak(spokenError);
// Don't set status to idle - return to listening after speaking error
// This keeps the voice session active
setError(errorMsg);
return null;
}
},
[getWellNuoToken, addTranscriptEntry, voiceApiType]
);
/**
* Interrupt TTS when user starts speaking
* Call this from the STT hook when voice activity is detected
*/
const interruptIfSpeaking = useCallback(() => {
if (isSpeaking) {
console.log('[VoiceContext] User interrupted - stopping TTS');
Speech.stop();
setIsSpeaking(false);
setStatus('listening');
return true;
}
return false;
}, [isSpeaking]);
/**
* Speak text using TTS
*/
const speak = useCallback(async (text: string): Promise<void> => {
if (!text.trim()) return;
// Don't speak if session was stopped
if (sessionStoppedRef.current) {
console.log('[VoiceContext] Session stopped, skipping TTS');
return;
}
console.log('[VoiceContext] Speaking:', text.slice(0, 50) + '...');
setStatus('speaking');
setIsSpeaking(true);
return new Promise((resolve) => {
Speech.speak(text, {
language: 'en-US',
rate: 0.9,
pitch: 1.0,
onStart: () => {
console.log('[VoiceContext] TTS started');
},
onDone: () => {
console.log('[VoiceContext] TTS completed');
// On iOS: Delay turning off green indicator to match STT restart delay (300ms)
// On Android: Turn off immediately (audio focus conflict with STT)
if (Platform.OS === 'ios') {
setTimeout(() => {
setIsSpeaking(false);
}, 300);
} else {
setIsSpeaking(false);
}
// Return to listening state after speaking (if session wasn't stopped)
if (!sessionStoppedRef.current) {
setStatus('listening');
}
resolve();
},
onError: (error) => {
console.warn('[VoiceContext] TTS error:', error);
// On error, turn off indicator immediately (no delay)
setIsSpeaking(false);
if (!sessionStoppedRef.current) {
setStatus('listening');
}
resolve();
},
onStopped: () => {
console.log('[VoiceContext] TTS stopped (interrupted)');
// When interrupted by user, turn off indicator immediately
setIsSpeaking(false);
// Don't set status to listening if session was stopped by user
if (!sessionStoppedRef.current) {
setStatus('listening');
}
resolve();
},
});
});
}, []);
/**
* Stop TTS playback
*/
const stopSpeaking = useCallback(() => {
Speech.stop();
setIsSpeaking(false);
}, []);
/**
* Start voice session
*/
const startSession = useCallback(() => {
console.log('[VoiceContext] Starting voice session');
sessionStoppedRef.current = false;
setStatus('listening');
setIsListening(true);
setError(null);
setTranscript('');
setPartialTranscript('');
}, []);
/**
* Stop voice session
*/
const stopSession = useCallback(() => {
console.log('[VoiceContext] Stopping voice session');
// Mark session as stopped FIRST to prevent any pending callbacks
sessionStoppedRef.current = true;
// Abort any in-flight API requests
if (abortControllerRef.current) {
abortControllerRef.current.abort();
abortControllerRef.current = null;
}
// Stop TTS
Speech.stop();
// Reset all state
setStatus('idle');
setIsListening(false);
setIsSpeaking(false);
setError(null);
}, []);
// Computed values
const isActive = status !== 'idle';
const isProcessing = status === 'processing';
return (
<VoiceContext.Provider
value={{
status,
isActive,
isListening,
isSpeaking,
isProcessing,
transcript,
partialTranscript,
lastResponse,
error,
startSession,
stopSession,
sendTranscript,
setTranscript,
setPartialTranscript,
setStatus,
setIsListening,
setIsSpeaking,
speak,
stopSpeaking,
interruptIfSpeaking,
voiceApiType,
updateVoiceApiType,
}}
>
{children}
</VoiceContext.Provider>
);
}
export function useVoice() {
const context = useContext(VoiceContext);
if (!context) {
throw new Error('useVoice must be used within VoiceProvider');
}
return context;
}