- FAB button now correctly stops session during speaking/processing states - Echo prevention: STT stopped during TTS playback, results ignored during speaking - Chat TTS only speaks when voice session is active (no auto-speak for text chat) - Session stop now aborts in-flight API requests and prevents race conditions - STT restarts after TTS with 800ms delay for audio focus release - Pending interrupt transcript processed after TTS completion - ChatContext added for message persistence across tab navigation - VoiceFAB redesigned with state-based animations - console.error replaced with console.warn across voice pipeline - no-speech STT errors silenced (normal silence behavior)
471 lines
14 KiB
TypeScript
471 lines
14 KiB
TypeScript
/**
|
|
* Voice Context - Local STT/TTS integration with WellNuo API
|
|
*
|
|
* Provides voice session management:
|
|
* - STT (Speech-to-Text) via expo-speech-recognition
|
|
* - API calls to WellNuo ask_wellnuo_ai
|
|
* - TTS (Text-to-Speech) via expo-speech
|
|
*
|
|
* Flow: User speaks → STT → API → Response → TTS → Continue listening
|
|
*/
|
|
|
|
import React, {
|
|
createContext,
|
|
useContext,
|
|
useState,
|
|
useCallback,
|
|
useRef,
|
|
ReactNode,
|
|
} from 'react';
|
|
import * as Speech from 'expo-speech';
|
|
import { api } from '@/services/api';
|
|
import { useVoiceTranscript } from './VoiceTranscriptContext';
|
|
|
|
// WellNuo API configuration (same as chat.tsx)
|
|
const API_URL = 'https://eluxnetworks.net/function/well-api/api';
|
|
const WELLNUO_USER = 'anandk';
|
|
const WELLNUO_PASSWORD = 'anandk_8';
|
|
|
|
// Single deployment mode - sends only deployment_id (no beneficiary_names_dict)
|
|
const SINGLE_DEPLOYMENT_MODE = true;
|
|
|
|
// Keywords for question normalization (same as chat.tsx)
|
|
const STATUS_KEYWORDS = [
|
|
/\bhow\s+is\b/i,
|
|
/\bhow'?s\b/i,
|
|
/\bhow\s+are\b/i,
|
|
/\btell\s+me\s+about\b/i,
|
|
/\bwhat'?s\s+up\s+with\b/i,
|
|
/\bupdate\s+on\b/i,
|
|
/\bstatus\b/i,
|
|
/\bdoing\b/i,
|
|
/\bfeeling\b/i,
|
|
/\bcheck\s+on\b/i,
|
|
/\bis\s+\w+\s+okay\b/i,
|
|
/\bis\s+\w+\s+alright\b/i,
|
|
/\bis\s+\w+\s+fine\b/i,
|
|
/\bokay\?\b/i,
|
|
/\balright\?\b/i,
|
|
];
|
|
|
|
const SUBJECT_KEYWORDS = [
|
|
/\bdad\b/i,
|
|
/\bfather\b/i,
|
|
/\bferdinand\b/i,
|
|
/\bhim\b/i,
|
|
/\bhe\b/i,
|
|
/\bmy\s+dad\b/i,
|
|
/\bmy\s+father\b/i,
|
|
/\bthe\s+patient\b/i,
|
|
/\bloved\s+one\b/i,
|
|
/\bparent\b/i,
|
|
/\bgrandpa\b/i,
|
|
/\bgrandfather\b/i,
|
|
];
|
|
|
|
/**
|
|
* Normalize question for WellNuo API (same logic as chat.tsx)
|
|
*/
|
|
function normalizeQuestion(userMessage: string): string {
|
|
const msgLower = userMessage.toLowerCase().trim();
|
|
|
|
const isStatusQuery = STATUS_KEYWORDS.some((pattern) => pattern.test(msgLower));
|
|
const isAboutRecipient = SUBJECT_KEYWORDS.some((pattern) => pattern.test(msgLower));
|
|
|
|
if (isStatusQuery && isAboutRecipient) {
|
|
console.log(`[VoiceContext] Normalized '${userMessage}' -> 'how is dad doing'`);
|
|
return 'how is dad doing';
|
|
}
|
|
|
|
if (isStatusQuery && !isAboutRecipient) {
|
|
console.log(
|
|
`[VoiceContext] Normalized '${userMessage}' -> 'how is dad doing' (assumed recipient)`
|
|
);
|
|
return 'how is dad doing';
|
|
}
|
|
|
|
console.log(`[VoiceContext] No normalization applied to: '${userMessage}'`);
|
|
return userMessage;
|
|
}
|
|
|
|
export type VoiceStatus = 'idle' | 'listening' | 'processing' | 'speaking';
|
|
|
|
interface VoiceContextValue {
|
|
// Current status of the voice session
|
|
status: VoiceStatus;
|
|
// Whether voice session is active (not idle)
|
|
isActive: boolean;
|
|
// Whether STT is currently listening
|
|
isListening: boolean;
|
|
// Whether TTS is currently speaking
|
|
isSpeaking: boolean;
|
|
// Whether processing API request
|
|
isProcessing: boolean;
|
|
// Current/last transcript from STT
|
|
transcript: string;
|
|
// Partial transcript (real-time preview)
|
|
partialTranscript: string;
|
|
// Last API response
|
|
lastResponse: string | null;
|
|
// Error message if any
|
|
error: string | null;
|
|
|
|
// Start voice session (begin listening)
|
|
startSession: () => void;
|
|
// Stop voice session
|
|
stopSession: () => void;
|
|
|
|
// Send transcript to API and get response with TTS
|
|
// Called automatically when STT detects speech end, or manually
|
|
sendTranscript: (text: string) => Promise<string | null>;
|
|
|
|
// Update transcript from external STT hook
|
|
setTranscript: (text: string) => void;
|
|
setPartialTranscript: (text: string) => void;
|
|
|
|
// Set status from external STT/TTS hooks
|
|
setStatus: (status: VoiceStatus) => void;
|
|
setIsListening: (listening: boolean) => void;
|
|
setIsSpeaking: (speaking: boolean) => void;
|
|
|
|
// Speak text using TTS
|
|
speak: (text: string) => Promise<void>;
|
|
// Stop TTS
|
|
stopSpeaking: () => void;
|
|
// Interrupt TTS if speaking (call when user starts talking)
|
|
interruptIfSpeaking: () => boolean;
|
|
}
|
|
|
|
const VoiceContext = createContext<VoiceContextValue | undefined>(undefined);
|
|
|
|
export function VoiceProvider({ children }: { children: ReactNode }) {
|
|
const [status, setStatus] = useState<VoiceStatus>('idle');
|
|
const [transcript, setTranscript] = useState('');
|
|
const [partialTranscript, setPartialTranscript] = useState('');
|
|
const [lastResponse, setLastResponse] = useState<string | null>(null);
|
|
const [error, setError] = useState<string | null>(null);
|
|
const [isListening, setIsListening] = useState(false);
|
|
const [isSpeaking, setIsSpeaking] = useState(false);
|
|
|
|
// Voice transcript context for chat display
|
|
const { addTranscriptEntry } = useVoiceTranscript();
|
|
|
|
// API token cache
|
|
const apiTokenRef = useRef<string | null>(null);
|
|
|
|
// Abort controller for cancelling in-flight API requests
|
|
const abortControllerRef = useRef<AbortController | null>(null);
|
|
|
|
// Flag to prevent speak() after session stopped
|
|
const sessionStoppedRef = useRef(false);
|
|
|
|
// Deployment ID from settings
|
|
const deploymentIdRef = useRef<string | null>(null);
|
|
|
|
// Load deployment ID on mount
|
|
React.useEffect(() => {
|
|
const loadDeploymentId = async () => {
|
|
const savedId = await api.getDeploymentId();
|
|
deploymentIdRef.current = savedId;
|
|
console.log('[VoiceContext] Loaded deployment ID:', savedId);
|
|
};
|
|
loadDeploymentId();
|
|
}, []);
|
|
|
|
/**
|
|
* Get WellNuo API token (same as chat.tsx)
|
|
*/
|
|
const getWellNuoToken = useCallback(async (): Promise<string> => {
|
|
if (apiTokenRef.current) {
|
|
return apiTokenRef.current;
|
|
}
|
|
|
|
const nonce = Math.floor(Math.random() * 1000000).toString();
|
|
const response = await fetch(API_URL, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
|
body: new URLSearchParams({
|
|
function: 'credentials',
|
|
clientId: 'MA_001',
|
|
user_name: WELLNUO_USER,
|
|
ps: WELLNUO_PASSWORD,
|
|
nonce: nonce,
|
|
}).toString(),
|
|
});
|
|
|
|
const data = await response.json();
|
|
if (data.status === '200 OK' && data.access_token) {
|
|
apiTokenRef.current = data.access_token;
|
|
console.log('[VoiceContext] WellNuo token obtained');
|
|
return data.access_token;
|
|
}
|
|
throw new Error('Failed to authenticate with WellNuo API');
|
|
}, []);
|
|
|
|
/**
|
|
* Send transcript to WellNuo API and speak the response
|
|
*/
|
|
const sendTranscript = useCallback(
|
|
async (text: string): Promise<string | null> => {
|
|
const trimmedText = text.trim();
|
|
if (!trimmedText) {
|
|
console.log('[VoiceContext] Empty transcript, skipping API call');
|
|
return null;
|
|
}
|
|
|
|
// Don't send if session was stopped
|
|
if (sessionStoppedRef.current) {
|
|
console.log('[VoiceContext] Session stopped, skipping API call');
|
|
return null;
|
|
}
|
|
|
|
console.log('[VoiceContext] Sending transcript to API:', trimmedText);
|
|
setStatus('processing');
|
|
setError(null);
|
|
|
|
// Add user message to transcript for chat display
|
|
addTranscriptEntry('user', trimmedText);
|
|
|
|
// Create abort controller for this request
|
|
if (abortControllerRef.current) {
|
|
abortControllerRef.current.abort();
|
|
}
|
|
const abortController = new AbortController();
|
|
abortControllerRef.current = abortController;
|
|
|
|
try {
|
|
// Get API token
|
|
const token = await getWellNuoToken();
|
|
|
|
// Check if aborted
|
|
if (abortController.signal.aborted || sessionStoppedRef.current) {
|
|
console.log('[VoiceContext] Request aborted before API call');
|
|
return null;
|
|
}
|
|
|
|
// Normalize question
|
|
const normalizedQuestion = normalizeQuestion(trimmedText);
|
|
|
|
// Get deployment ID
|
|
const deploymentId = deploymentIdRef.current || '21';
|
|
|
|
// Build request params
|
|
const requestParams: Record<string, string> = {
|
|
function: 'ask_wellnuo_ai',
|
|
clientId: 'MA_001',
|
|
user_name: WELLNUO_USER,
|
|
token: token,
|
|
question: normalizedQuestion,
|
|
deployment_id: deploymentId,
|
|
};
|
|
|
|
// Only add beneficiary_names_dict if NOT in single deployment mode
|
|
if (!SINGLE_DEPLOYMENT_MODE) {
|
|
// For full app, would include beneficiary names dict
|
|
// Currently single deployment mode only
|
|
}
|
|
|
|
const response = await fetch(API_URL, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
|
body: new URLSearchParams(requestParams).toString(),
|
|
signal: abortController.signal,
|
|
});
|
|
|
|
const data = await response.json();
|
|
|
|
// Check if session was stopped while waiting for response
|
|
if (sessionStoppedRef.current) {
|
|
console.log('[VoiceContext] Session stopped during API call, discarding response');
|
|
return null;
|
|
}
|
|
|
|
if (data.ok && data.response?.body) {
|
|
const responseText = data.response.body;
|
|
console.log('[VoiceContext] API response:', responseText.slice(0, 100) + '...');
|
|
setLastResponse(responseText);
|
|
|
|
// Add Julia's response to transcript for chat display
|
|
addTranscriptEntry('assistant', responseText);
|
|
|
|
// Speak the response (will be skipped if session stopped)
|
|
await speak(responseText);
|
|
|
|
return responseText;
|
|
} else {
|
|
// Token might be expired
|
|
if (data.status === '401 Unauthorized') {
|
|
apiTokenRef.current = null;
|
|
throw new Error('Session expired, please try again');
|
|
}
|
|
throw new Error(data.message || 'Could not get response');
|
|
}
|
|
} catch (err) {
|
|
// Ignore abort errors
|
|
if (err instanceof Error && err.name === 'AbortError') {
|
|
console.log('[VoiceContext] API request aborted');
|
|
return null;
|
|
}
|
|
const errorMsg = err instanceof Error ? err.message : 'Unknown error';
|
|
console.warn('[VoiceContext] API error:', errorMsg);
|
|
setError(errorMsg);
|
|
setStatus('idle');
|
|
return null;
|
|
}
|
|
},
|
|
[getWellNuoToken, addTranscriptEntry]
|
|
);
|
|
|
|
/**
|
|
* Interrupt TTS when user starts speaking
|
|
* Call this from the STT hook when voice activity is detected
|
|
*/
|
|
const interruptIfSpeaking = useCallback(() => {
|
|
if (isSpeaking) {
|
|
console.log('[VoiceContext] User interrupted - stopping TTS');
|
|
Speech.stop();
|
|
setIsSpeaking(false);
|
|
setStatus('listening');
|
|
return true;
|
|
}
|
|
return false;
|
|
}, [isSpeaking]);
|
|
|
|
/**
|
|
* Speak text using TTS
|
|
*/
|
|
const speak = useCallback(async (text: string): Promise<void> => {
|
|
if (!text.trim()) return;
|
|
|
|
// Don't speak if session was stopped
|
|
if (sessionStoppedRef.current) {
|
|
console.log('[VoiceContext] Session stopped, skipping TTS');
|
|
return;
|
|
}
|
|
|
|
console.log('[VoiceContext] Speaking:', text.slice(0, 50) + '...');
|
|
setStatus('speaking');
|
|
setIsSpeaking(true);
|
|
|
|
return new Promise((resolve) => {
|
|
Speech.speak(text, {
|
|
language: 'en-US',
|
|
rate: 0.9,
|
|
pitch: 1.0,
|
|
onStart: () => {
|
|
console.log('[VoiceContext] TTS started');
|
|
},
|
|
onDone: () => {
|
|
console.log('[VoiceContext] TTS completed');
|
|
setIsSpeaking(false);
|
|
// Return to listening state after speaking (if session wasn't stopped)
|
|
if (!sessionStoppedRef.current) {
|
|
setStatus('listening');
|
|
}
|
|
resolve();
|
|
},
|
|
onError: (error) => {
|
|
console.warn('[VoiceContext] TTS error:', error);
|
|
setIsSpeaking(false);
|
|
if (!sessionStoppedRef.current) {
|
|
setStatus('listening');
|
|
}
|
|
resolve();
|
|
},
|
|
onStopped: () => {
|
|
console.log('[VoiceContext] TTS stopped (interrupted)');
|
|
setIsSpeaking(false);
|
|
// Don't set status to listening if session was stopped by user
|
|
if (!sessionStoppedRef.current) {
|
|
setStatus('listening');
|
|
}
|
|
resolve();
|
|
},
|
|
});
|
|
});
|
|
}, []);
|
|
|
|
/**
|
|
* Stop TTS playback
|
|
*/
|
|
const stopSpeaking = useCallback(() => {
|
|
Speech.stop();
|
|
setIsSpeaking(false);
|
|
}, []);
|
|
|
|
/**
|
|
* Start voice session
|
|
*/
|
|
const startSession = useCallback(() => {
|
|
console.log('[VoiceContext] Starting voice session');
|
|
sessionStoppedRef.current = false;
|
|
setStatus('listening');
|
|
setIsListening(true);
|
|
setError(null);
|
|
setTranscript('');
|
|
setPartialTranscript('');
|
|
}, []);
|
|
|
|
/**
|
|
* Stop voice session
|
|
*/
|
|
const stopSession = useCallback(() => {
|
|
console.log('[VoiceContext] Stopping voice session');
|
|
// Mark session as stopped FIRST to prevent any pending callbacks
|
|
sessionStoppedRef.current = true;
|
|
// Abort any in-flight API requests
|
|
if (abortControllerRef.current) {
|
|
abortControllerRef.current.abort();
|
|
abortControllerRef.current = null;
|
|
}
|
|
// Stop TTS
|
|
Speech.stop();
|
|
// Reset all state
|
|
setStatus('idle');
|
|
setIsListening(false);
|
|
setIsSpeaking(false);
|
|
setError(null);
|
|
}, []);
|
|
|
|
// Computed values
|
|
const isActive = status !== 'idle';
|
|
const isProcessing = status === 'processing';
|
|
|
|
return (
|
|
<VoiceContext.Provider
|
|
value={{
|
|
status,
|
|
isActive,
|
|
isListening,
|
|
isSpeaking,
|
|
isProcessing,
|
|
transcript,
|
|
partialTranscript,
|
|
lastResponse,
|
|
error,
|
|
startSession,
|
|
stopSession,
|
|
sendTranscript,
|
|
setTranscript,
|
|
setPartialTranscript,
|
|
setStatus,
|
|
setIsListening,
|
|
setIsSpeaking,
|
|
speak,
|
|
stopSpeaking,
|
|
interruptIfSpeaking,
|
|
}}
|
|
>
|
|
{children}
|
|
</VoiceContext.Provider>
|
|
);
|
|
}
|
|
|
|
export function useVoice() {
|
|
const context = useContext(VoiceContext);
|
|
if (!context) {
|
|
throw new Error('useVoice must be used within VoiceProvider');
|
|
}
|
|
return context;
|
|
}
|