wellnua-lite/contexts/VoiceContext.tsx
Sergei f4a239ff43 Improve TTS voice quality - faster rate, higher pitch, iOS premium voice
Changes to contexts/VoiceContext.tsx:
- Increase rate from 0.9 to 1.1 (faster, more natural)
- Increase pitch from 1.0 to 1.15 (slightly higher, less robotic)
- Add iOS premium voice (Samantha - Siri quality)
- Android continues to use default high-quality voice

This fixes the complaint that the voice sounded "отсталый" (backward/outdated)
and "жёсткий" (harsh/stiff) on iOS.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-29 09:46:38 -08:00

611 lines
21 KiB
TypeScript

/**
* Voice Context - Local STT/TTS integration with WellNuo API
*
* Provides voice session management:
* - STT (Speech-to-Text) via expo-speech-recognition
* - API calls to WellNuo ask_wellnuo_ai
* - TTS (Text-to-Speech) via expo-speech
*
* Flow: User speaks → STT → API → Response → TTS → Continue listening
*/
import React, {
createContext,
useContext,
useState,
useCallback,
useRef,
ReactNode,
} from 'react';
import { Platform } from 'react-native';
import * as Speech from 'expo-speech';
import { api } from '@/services/api';
import { useVoiceTranscript } from './VoiceTranscriptContext';
// WellNuo API configuration (same as chat.tsx)
const API_URL = 'https://eluxnetworks.net/function/well-api/api';
const WELLNUO_USER = 'anandk';
const WELLNUO_PASSWORD = 'anandk_8';
// Single deployment mode - sends only deployment_id (no beneficiary_names_dict)
const SINGLE_DEPLOYMENT_MODE = true;
// Keywords for question normalization (same as chat.tsx)
const STATUS_KEYWORDS = [
/\bhow\s+is\b/i,
/\bhow'?s\b/i,
/\bhow\s+are\b/i,
/\btell\s+me\s+about\b/i,
/\bwhat'?s\s+up\s+with\b/i,
/\bupdate\s+on\b/i,
/\bstatus\b/i,
/\bdoing\b/i,
/\bfeeling\b/i,
/\bcheck\s+on\b/i,
/\bis\s+\w+\s+okay\b/i,
/\bis\s+\w+\s+alright\b/i,
/\bis\s+\w+\s+fine\b/i,
/\bokay\?\b/i,
/\balright\?\b/i,
];
const SUBJECT_KEYWORDS = [
/\bdad\b/i,
/\bfather\b/i,
/\bferdinand\b/i,
/\bhim\b/i,
/\bhe\b/i,
/\bmy\s+dad\b/i,
/\bmy\s+father\b/i,
/\bthe\s+patient\b/i,
/\bloved\s+one\b/i,
/\bparent\b/i,
/\bgrandpa\b/i,
/\bgrandfather\b/i,
];
/**
* Normalize question for WellNuo API (same logic as chat.tsx)
*/
function normalizeQuestion(userMessage: string): string {
const msgLower = userMessage.toLowerCase().trim();
const isStatusQuery = STATUS_KEYWORDS.some((pattern) => pattern.test(msgLower));
const isAboutRecipient = SUBJECT_KEYWORDS.some((pattern) => pattern.test(msgLower));
if (isStatusQuery && isAboutRecipient) {
console.log(`[VoiceContext] Normalized '${userMessage}' -> 'how is dad doing'`);
return 'how is dad doing';
}
if (isStatusQuery && !isAboutRecipient) {
console.log(
`[VoiceContext] Normalized '${userMessage}' -> 'how is dad doing' (assumed recipient)`
);
return 'how is dad doing';
}
console.log(`[VoiceContext] No normalization applied to: '${userMessage}'`);
return userMessage;
}
export type VoiceStatus = 'idle' | 'listening' | 'processing' | 'speaking';
interface VoiceContextValue {
// Current status of the voice session
status: VoiceStatus;
// Whether voice session is active (not idle)
isActive: boolean;
// Whether STT is currently listening
isListening: boolean;
// Whether TTS is currently speaking
isSpeaking: boolean;
// Whether processing API request
isProcessing: boolean;
// Current/last transcript from STT
transcript: string;
// Partial transcript (real-time preview)
partialTranscript: string;
// Last API response
lastResponse: string | null;
// Error message if any
error: string | null;
// Start voice session (begin listening)
startSession: () => void;
// Stop voice session
stopSession: () => void;
// Send transcript to API and get response with TTS
// Called automatically when STT detects speech end, or manually
sendTranscript: (text: string) => Promise<string | null>;
// Update transcript from external STT hook
setTranscript: (text: string) => void;
setPartialTranscript: (text: string) => void;
// Set status from external STT/TTS hooks
setStatus: (status: VoiceStatus) => void;
setIsListening: (listening: boolean) => void;
setIsSpeaking: (speaking: boolean) => void;
// Speak text using TTS
speak: (text: string) => Promise<void>;
// Stop TTS
stopSpeaking: () => void;
// Interrupt TTS if speaking (call when user starts talking)
interruptIfSpeaking: () => boolean;
// Voice API configuration
voiceApiType: 'voice_ask' | 'ask_wellnuo_ai';
updateVoiceApiType: (type: 'voice_ask' | 'ask_wellnuo_ai') => void;
}
const VoiceContext = createContext<VoiceContextValue | undefined>(undefined);
export function VoiceProvider({ children }: { children: ReactNode }) {
const [status, setStatus] = useState<VoiceStatus>('idle');
const [transcript, setTranscript] = useState('');
const [partialTranscript, setPartialTranscript] = useState('');
const [lastResponse, setLastResponse] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
const [isListening, setIsListening] = useState(false);
const [isSpeaking, setIsSpeaking] = useState(false);
// Voice transcript context for chat display
const { addTranscriptEntry } = useVoiceTranscript();
// API token cache
const apiTokenRef = useRef<string | null>(null);
// Abort controller for cancelling in-flight API requests
const abortControllerRef = useRef<AbortController | null>(null);
// Flag to prevent speak() after session stopped
const sessionStoppedRef = useRef(false);
// Deployment ID from settings
const deploymentIdRef = useRef<string | null>(null);
// Voice API type (voice_ask or ask_wellnuo_ai)
const [voiceApiType, setVoiceApiType] = useState<'voice_ask' | 'ask_wellnuo_ai'>('ask_wellnuo_ai');
// Load voice API type on mount
React.useEffect(() => {
const loadVoiceApiType = async () => {
const savedType = await api.getVoiceApiType();
setVoiceApiType(savedType);
console.log('[VoiceContext] Loaded voice API type:', savedType);
};
loadVoiceApiType();
}, []);
// Load deployment ID on mount
React.useEffect(() => {
const loadDeploymentId = async () => {
const savedId = await api.getDeploymentId();
deploymentIdRef.current = savedId;
console.log('[VoiceContext] Loaded deployment ID:', savedId);
};
loadDeploymentId();
}, []);
/**
* Update voice API type (voice_ask or ask_wellnuo_ai)
*/
const updateVoiceApiType = useCallback(async (type: 'voice_ask' | 'ask_wellnuo_ai') => {
console.log('[VoiceContext] Updating voice API type to:', type);
setVoiceApiType(type);
await api.setVoiceApiType(type);
}, []);
/**
* Get WellNuo API token (same as chat.tsx)
*/
const getWellNuoToken = useCallback(async (): Promise<string> => {
if (apiTokenRef.current) {
return apiTokenRef.current;
}
const nonce = Math.floor(Math.random() * 1000000).toString();
const response = await fetch(API_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: new URLSearchParams({
function: 'credentials',
clientId: 'MA_001',
user_name: WELLNUO_USER,
ps: WELLNUO_PASSWORD,
nonce: nonce,
}).toString(),
});
const data = await response.json();
if (data.status === '200 OK' && data.access_token) {
apiTokenRef.current = data.access_token;
console.log('[VoiceContext] WellNuo token obtained');
return data.access_token;
}
throw new Error('Failed to authenticate with WellNuo API');
}, []);
/**
* Send transcript to WellNuo API and speak the response
*/
const sendTranscript = useCallback(
async (text: string): Promise<string | null> => {
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
const trimmedText = text.trim();
if (!trimmedText) {
console.log(`${platformPrefix} [VoiceContext] Empty transcript, skipping API call`);
return null;
}
// Don't send if session was stopped
if (sessionStoppedRef.current) {
console.log(`${platformPrefix} [VoiceContext] ⚠️ Session stopped, skipping API call`);
return null;
}
console.log(`${platformPrefix} [VoiceContext] 📤 Sending transcript to API (${voiceApiType}): "${trimmedText}"`);
setStatus('processing');
setError(null);
// Add user message to transcript for chat display
addTranscriptEntry('user', trimmedText);
// Create abort controller for this request
if (abortControllerRef.current) {
abortControllerRef.current.abort();
}
const abortController = new AbortController();
abortControllerRef.current = abortController;
try {
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
// Get API token
console.log(`${platformPrefix} [VoiceContext] 🔑 Getting API token...`);
const token = await getWellNuoToken();
console.log(`${platformPrefix} [VoiceContext] ✅ Token obtained`);
// Check if aborted
if (abortController.signal.aborted || sessionStoppedRef.current) {
console.log(`${platformPrefix} [VoiceContext] ⚠️ Request aborted before API call`);
return null;
}
// Normalize question
const normalizedQuestion = normalizeQuestion(trimmedText);
console.log(`${platformPrefix} [VoiceContext] 📝 Normalized question: "${normalizedQuestion}"`);
// Get deployment ID
const deploymentId = deploymentIdRef.current || '21';
// Log which API type we're using
console.log(`${platformPrefix} [VoiceContext] 📡 Using API type: ${voiceApiType}, deployment: ${deploymentId}`);
// Build request params
const requestParams: Record<string, string> = {
function: voiceApiType, // Use the selected voiceApiType
clientId: 'MA_001',
user_name: WELLNUO_USER,
token: token,
question: normalizedQuestion,
deployment_id: deploymentId,
};
// Only add beneficiary_names_dict if NOT in single deployment mode
if (!SINGLE_DEPLOYMENT_MODE) {
// For full app, would include beneficiary names dict
// Currently single deployment mode only
}
console.log(`${platformPrefix} [VoiceContext] 🌐 Sending API request...`);
const response = await fetch(API_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: new URLSearchParams(requestParams).toString(),
signal: abortController.signal,
});
console.log(`${platformPrefix} [VoiceContext] 📥 API response received, parsing...`);
const data = await response.json();
// Check if session was stopped while waiting for response
if (sessionStoppedRef.current) {
console.log(`${platformPrefix} [VoiceContext] ⚠️ Session stopped during API call, discarding response`);
return null;
}
if (data.ok && data.response?.body) {
const responseText = data.response.body;
console.log(`${platformPrefix} [VoiceContext] ✅ API SUCCESS: "${responseText.slice(0, 100)}..."`);
setLastResponse(responseText);
// Add Julia's response to transcript for chat display
addTranscriptEntry('assistant', responseText);
console.log(`${platformPrefix} [VoiceContext] 🔊 Starting TTS for response...`);
// Speak the response (will be skipped if session stopped)
await speak(responseText);
console.log(`${platformPrefix} [VoiceContext] ✅ TTS completed`);
return responseText;
} else {
// Token might be expired - retry with new token
if (data.status === '401 Unauthorized') {
console.log(`${platformPrefix} [VoiceContext] ⚠️ 401 Unauthorized - Token expired, retrying...`);
apiTokenRef.current = null;
// Get new token and retry request
console.log(`${platformPrefix} [VoiceContext] 🔑 Getting new token for retry...`);
const newToken = await getWellNuoToken();
const retryRequestParams: Record<string, string> = {
function: voiceApiType,
clientId: 'MA_001',
user_name: WELLNUO_USER,
token: newToken,
question: normalizedQuestion,
deployment_id: deploymentId,
};
const retryResponse = await fetch(API_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: new URLSearchParams(retryRequestParams).toString(),
signal: abortController.signal,
});
const retryData = await retryResponse.json();
if (retryData.ok && retryData.response?.body) {
const responseText = retryData.response.body;
console.log(`${platformPrefix} [VoiceContext] ✅ Retry SUCCEEDED: "${responseText.slice(0, 100)}..."`);
setLastResponse(responseText);
addTranscriptEntry('assistant', responseText);
await speak(responseText);
return responseText;
} else {
console.error(`${platformPrefix} [VoiceContext] ❌ Retry FAILED:`, retryData.message);
throw new Error(retryData.message || 'Could not get response after retry');
}
}
console.error(`${platformPrefix} [VoiceContext] ❌ API error:`, data.message || data.status);
throw new Error(data.message || 'Could not get response');
}
} catch (err) {
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
// Ignore abort errors
if (err instanceof Error && err.name === 'AbortError') {
console.log(`${platformPrefix} [VoiceContext] ⚠️ API request aborted`);
return null;
}
// Handle API errors gracefully with voice feedback
const errorMsg = err instanceof Error ? err.message : 'Unknown error';
console.error(`${platformPrefix} [VoiceContext] ❌ API ERROR:`, errorMsg);
// Create user-friendly error message for TTS
const spokenError = `Sorry, I encountered an error: ${errorMsg}. Please try again.`;
// Add error to transcript for chat display
addTranscriptEntry('assistant', spokenError);
// Speak the error message
await speak(spokenError);
// Don't set status to idle - return to listening after speaking error
// This keeps the voice session active
setError(errorMsg);
return null;
}
},
[getWellNuoToken, addTranscriptEntry, voiceApiType]
);
/**
* Interrupt TTS when user starts speaking
* Call this from the STT hook when voice activity is detected
*/
const interruptIfSpeaking = useCallback(() => {
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
if (isSpeaking) {
console.log(`${platformPrefix} [VoiceContext] ⚠️ User INTERRUPTED - stopping TTS`);
Speech.stop();
setIsSpeaking(false);
setStatus('listening');
console.log(`${platformPrefix} [VoiceContext] → TTS stopped, status=listening`);
return true;
} else {
console.log(`${platformPrefix} [VoiceContext] interruptIfSpeaking called but NOT speaking`);
return false;
}
}, [isSpeaking]);
/**
* Speak text using TTS
*/
const speak = useCallback(async (text: string): Promise<void> => {
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
if (!text.trim()) {
console.log(`${platformPrefix} [VoiceContext] Empty text, skipping TTS`);
return;
}
// Don't speak if session was stopped
if (sessionStoppedRef.current) {
console.log(`${platformPrefix} [VoiceContext] ⚠️ Session stopped, skipping TTS`);
return;
}
console.log(`${platformPrefix} [VoiceContext] 🔊 Starting TTS: "${text.slice(0, 50)}..."`);
setStatus('speaking');
setIsSpeaking(true);
return new Promise((resolve) => {
Speech.speak(text, {
language: 'en-US',
rate: 1.1, // Faster, more natural (was 0.9)
pitch: 1.15, // Slightly higher, less robotic (was 1.0)
// iOS Premium voice (Siri-quality, female)
// Android will use default high-quality voice
voice: Platform.OS === 'ios' ? 'com.apple.voice.premium.en-US.Samantha' : undefined,
onStart: () => {
console.log(`${platformPrefix} [VoiceContext] ▶️ TTS playback STARTED`);
},
onDone: () => {
console.log(`${platformPrefix} [VoiceContext] ✅ TTS playback COMPLETED`);
// On iOS: Delay turning off green indicator to match STT restart delay (300ms)
// On Android: Turn off immediately (audio focus conflict with STT)
if (Platform.OS === 'ios') {
console.log('[iOS] [VoiceContext] ⏱️ Delaying isSpeaking=false by 300ms (match STT restart)');
setTimeout(() => {
console.log('[iOS] [VoiceContext] → isSpeaking = false (after 300ms delay)');
setIsSpeaking(false);
}, 300);
} else {
console.log('[Android] [VoiceContext] → isSpeaking = false (immediate - audio focus release)');
setIsSpeaking(false);
}
// Return to listening state after speaking (if session wasn't stopped)
if (!sessionStoppedRef.current) {
console.log(`${platformPrefix} [VoiceContext] → status = listening (ready for next input)`);
setStatus('listening');
} else {
console.log(`${platformPrefix} [VoiceContext] ⚠️ Session stopped, NOT returning to listening`);
}
resolve();
},
onError: (error) => {
console.error(`${platformPrefix} [VoiceContext] ❌ TTS ERROR:`, error);
// On error, turn off indicator immediately (no delay)
setIsSpeaking(false);
if (!sessionStoppedRef.current) {
setStatus('listening');
}
resolve();
},
onStopped: () => {
console.log(`${platformPrefix} [VoiceContext] ⏹️ TTS STOPPED (interrupted by user)`);
// When interrupted by user, turn off indicator immediately
setIsSpeaking(false);
// Don't set status to listening if session was stopped by user
if (!sessionStoppedRef.current) {
console.log(`${platformPrefix} [VoiceContext] → status = listening (after interruption)`);
setStatus('listening');
} else {
console.log(`${platformPrefix} [VoiceContext] ⚠️ Session stopped, NOT returning to listening`);
}
resolve();
},
});
});
}, []);
/**
* Stop TTS playback
*/
const stopSpeaking = useCallback(() => {
Speech.stop();
setIsSpeaking(false);
}, []);
/**
* Start voice session
*/
const startSession = useCallback(() => {
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
console.log(`${platformPrefix} [VoiceContext] 🎤 STARTING voice session`);
sessionStoppedRef.current = false;
setStatus('listening');
setIsListening(true);
setError(null);
setTranscript('');
setPartialTranscript('');
console.log(`${platformPrefix} [VoiceContext] → Session initialized, status=listening`);
}, []);
/**
* Stop voice session
*/
const stopSession = useCallback(() => {
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
console.log(`${platformPrefix} [VoiceContext] 🛑 STOPPING voice session`);
// Mark session as stopped FIRST to prevent any pending callbacks
sessionStoppedRef.current = true;
console.log(`${platformPrefix} [VoiceContext] → sessionStopped flag set to TRUE`);
// Abort any in-flight API requests
if (abortControllerRef.current) {
console.log(`${platformPrefix} [VoiceContext] → Aborting in-flight API request`);
abortControllerRef.current.abort();
abortControllerRef.current = null;
}
// Stop TTS
console.log(`${platformPrefix} [VoiceContext] → Stopping TTS`);
Speech.stop();
// Reset all state
console.log(`${platformPrefix} [VoiceContext] → Resetting all state to idle`);
setStatus('idle');
setIsListening(false);
setIsSpeaking(false);
setError(null);
console.log(`${platformPrefix} [VoiceContext] ✅ Voice session stopped`);
}, []);
// Computed values
const isActive = status !== 'idle';
const isProcessing = status === 'processing';
return (
<VoiceContext.Provider
value={{
status,
isActive,
isListening,
isSpeaking,
isProcessing,
transcript,
partialTranscript,
lastResponse,
error,
startSession,
stopSession,
sendTranscript,
setTranscript,
setPartialTranscript,
setStatus,
setIsListening,
setIsSpeaking,
speak,
stopSpeaking,
interruptIfSpeaking,
voiceApiType,
updateVoiceApiType,
}}
>
{children}
</VoiceContext.Provider>
);
}
export function useVoice() {
const context = useContext(VoiceContext);
if (!context) {
throw new Error('useVoice must be used within VoiceProvider');
}
return context;
}