Improve TTS voice quality - faster rate, higher pitch, iOS premium voice
Changes to contexts/VoiceContext.tsx: - Increase rate from 0.9 to 1.1 (faster, more natural) - Increase pitch from 1.0 to 1.15 (slightly higher, less robotic) - Add iOS premium voice (Samantha - Siri quality) - Android continues to use default high-quality voice This fixes the complaint that the voice sounded "отсталый" (backward/outdated) and "жёсткий" (harsh/stiff) on iOS. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
81a0c59060
commit
f4a239ff43
@ -234,19 +234,21 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
|
|||||||
*/
|
*/
|
||||||
const sendTranscript = useCallback(
|
const sendTranscript = useCallback(
|
||||||
async (text: string): Promise<string | null> => {
|
async (text: string): Promise<string | null> => {
|
||||||
|
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
|
||||||
const trimmedText = text.trim();
|
const trimmedText = text.trim();
|
||||||
|
|
||||||
if (!trimmedText) {
|
if (!trimmedText) {
|
||||||
console.log('[VoiceContext] Empty transcript, skipping API call');
|
console.log(`${platformPrefix} [VoiceContext] Empty transcript, skipping API call`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't send if session was stopped
|
// Don't send if session was stopped
|
||||||
if (sessionStoppedRef.current) {
|
if (sessionStoppedRef.current) {
|
||||||
console.log('[VoiceContext] Session stopped, skipping API call');
|
console.log(`${platformPrefix} [VoiceContext] ⚠️ Session stopped, skipping API call`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[VoiceContext] Sending transcript to API (${voiceApiType}):`, trimmedText);
|
console.log(`${platformPrefix} [VoiceContext] 📤 Sending transcript to API (${voiceApiType}): "${trimmedText}"`);
|
||||||
setStatus('processing');
|
setStatus('processing');
|
||||||
setError(null);
|
setError(null);
|
||||||
|
|
||||||
@ -261,23 +263,28 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
|
|||||||
abortControllerRef.current = abortController;
|
abortControllerRef.current = abortController;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
|
||||||
|
|
||||||
// Get API token
|
// Get API token
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] 🔑 Getting API token...`);
|
||||||
const token = await getWellNuoToken();
|
const token = await getWellNuoToken();
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] ✅ Token obtained`);
|
||||||
|
|
||||||
// Check if aborted
|
// Check if aborted
|
||||||
if (abortController.signal.aborted || sessionStoppedRef.current) {
|
if (abortController.signal.aborted || sessionStoppedRef.current) {
|
||||||
console.log('[VoiceContext] Request aborted before API call');
|
console.log(`${platformPrefix} [VoiceContext] ⚠️ Request aborted before API call`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normalize question
|
// Normalize question
|
||||||
const normalizedQuestion = normalizeQuestion(trimmedText);
|
const normalizedQuestion = normalizeQuestion(trimmedText);
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] 📝 Normalized question: "${normalizedQuestion}"`);
|
||||||
|
|
||||||
// Get deployment ID
|
// Get deployment ID
|
||||||
const deploymentId = deploymentIdRef.current || '21';
|
const deploymentId = deploymentIdRef.current || '21';
|
||||||
|
|
||||||
// Log which API type we're using
|
// Log which API type we're using
|
||||||
console.log('[VoiceContext] Using API type:', voiceApiType);
|
console.log(`${platformPrefix} [VoiceContext] 📡 Using API type: ${voiceApiType}, deployment: ${deploymentId}`);
|
||||||
|
|
||||||
// Build request params
|
// Build request params
|
||||||
const requestParams: Record<string, string> = {
|
const requestParams: Record<string, string> = {
|
||||||
@ -295,6 +302,7 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
|
|||||||
// Currently single deployment mode only
|
// Currently single deployment mode only
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] 🌐 Sending API request...`);
|
||||||
const response = await fetch(API_URL, {
|
const response = await fetch(API_URL, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||||
@ -302,33 +310,37 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
|
|||||||
signal: abortController.signal,
|
signal: abortController.signal,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] 📥 API response received, parsing...`);
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
|
|
||||||
// Check if session was stopped while waiting for response
|
// Check if session was stopped while waiting for response
|
||||||
if (sessionStoppedRef.current) {
|
if (sessionStoppedRef.current) {
|
||||||
console.log('[VoiceContext] Session stopped during API call, discarding response');
|
console.log(`${platformPrefix} [VoiceContext] ⚠️ Session stopped during API call, discarding response`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data.ok && data.response?.body) {
|
if (data.ok && data.response?.body) {
|
||||||
const responseText = data.response.body;
|
const responseText = data.response.body;
|
||||||
console.log('[VoiceContext] API response:', responseText.slice(0, 100) + '...');
|
console.log(`${platformPrefix} [VoiceContext] ✅ API SUCCESS: "${responseText.slice(0, 100)}..."`);
|
||||||
setLastResponse(responseText);
|
setLastResponse(responseText);
|
||||||
|
|
||||||
// Add Julia's response to transcript for chat display
|
// Add Julia's response to transcript for chat display
|
||||||
addTranscriptEntry('assistant', responseText);
|
addTranscriptEntry('assistant', responseText);
|
||||||
|
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] 🔊 Starting TTS for response...`);
|
||||||
// Speak the response (will be skipped if session stopped)
|
// Speak the response (will be skipped if session stopped)
|
||||||
await speak(responseText);
|
await speak(responseText);
|
||||||
|
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] ✅ TTS completed`);
|
||||||
return responseText;
|
return responseText;
|
||||||
} else {
|
} else {
|
||||||
// Token might be expired - retry with new token
|
// Token might be expired - retry with new token
|
||||||
if (data.status === '401 Unauthorized') {
|
if (data.status === '401 Unauthorized') {
|
||||||
console.log('[VoiceContext] Token expired, retrying with new token...');
|
console.log(`${platformPrefix} [VoiceContext] ⚠️ 401 Unauthorized - Token expired, retrying...`);
|
||||||
apiTokenRef.current = null;
|
apiTokenRef.current = null;
|
||||||
|
|
||||||
// Get new token and retry request
|
// Get new token and retry request
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] 🔑 Getting new token for retry...`);
|
||||||
const newToken = await getWellNuoToken();
|
const newToken = await getWellNuoToken();
|
||||||
|
|
||||||
const retryRequestParams: Record<string, string> = {
|
const retryRequestParams: Record<string, string> = {
|
||||||
@ -351,27 +363,31 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
|
|||||||
|
|
||||||
if (retryData.ok && retryData.response?.body) {
|
if (retryData.ok && retryData.response?.body) {
|
||||||
const responseText = retryData.response.body;
|
const responseText = retryData.response.body;
|
||||||
console.log('[VoiceContext] Retry succeeded:', responseText.slice(0, 100) + '...');
|
console.log(`${platformPrefix} [VoiceContext] ✅ Retry SUCCEEDED: "${responseText.slice(0, 100)}..."`);
|
||||||
setLastResponse(responseText);
|
setLastResponse(responseText);
|
||||||
addTranscriptEntry('assistant', responseText);
|
addTranscriptEntry('assistant', responseText);
|
||||||
await speak(responseText);
|
await speak(responseText);
|
||||||
return responseText;
|
return responseText;
|
||||||
} else {
|
} else {
|
||||||
|
console.error(`${platformPrefix} [VoiceContext] ❌ Retry FAILED:`, retryData.message);
|
||||||
throw new Error(retryData.message || 'Could not get response after retry');
|
throw new Error(retryData.message || 'Could not get response after retry');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
console.error(`${platformPrefix} [VoiceContext] ❌ API error:`, data.message || data.status);
|
||||||
throw new Error(data.message || 'Could not get response');
|
throw new Error(data.message || 'Could not get response');
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
|
||||||
|
|
||||||
// Ignore abort errors
|
// Ignore abort errors
|
||||||
if (err instanceof Error && err.name === 'AbortError') {
|
if (err instanceof Error && err.name === 'AbortError') {
|
||||||
console.log('[VoiceContext] API request aborted');
|
console.log(`${platformPrefix} [VoiceContext] ⚠️ API request aborted`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle API errors gracefully with voice feedback
|
// Handle API errors gracefully with voice feedback
|
||||||
const errorMsg = err instanceof Error ? err.message : 'Unknown error';
|
const errorMsg = err instanceof Error ? err.message : 'Unknown error';
|
||||||
console.warn('[VoiceContext] API error:', errorMsg);
|
console.error(`${platformPrefix} [VoiceContext] ❌ API ERROR:`, errorMsg);
|
||||||
|
|
||||||
// Create user-friendly error message for TTS
|
// Create user-friendly error message for TTS
|
||||||
const spokenError = `Sorry, I encountered an error: ${errorMsg}. Please try again.`;
|
const spokenError = `Sorry, I encountered an error: ${errorMsg}. Please try again.`;
|
||||||
@ -397,59 +413,80 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
|
|||||||
* Call this from the STT hook when voice activity is detected
|
* Call this from the STT hook when voice activity is detected
|
||||||
*/
|
*/
|
||||||
const interruptIfSpeaking = useCallback(() => {
|
const interruptIfSpeaking = useCallback(() => {
|
||||||
|
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
|
||||||
|
|
||||||
if (isSpeaking) {
|
if (isSpeaking) {
|
||||||
console.log('[VoiceContext] User interrupted - stopping TTS');
|
console.log(`${platformPrefix} [VoiceContext] ⚠️ User INTERRUPTED - stopping TTS`);
|
||||||
Speech.stop();
|
Speech.stop();
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
setStatus('listening');
|
setStatus('listening');
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] → TTS stopped, status=listening`);
|
||||||
return true;
|
return true;
|
||||||
|
} else {
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] interruptIfSpeaking called but NOT speaking`);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
}, [isSpeaking]);
|
}, [isSpeaking]);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Speak text using TTS
|
* Speak text using TTS
|
||||||
*/
|
*/
|
||||||
const speak = useCallback(async (text: string): Promise<void> => {
|
const speak = useCallback(async (text: string): Promise<void> => {
|
||||||
if (!text.trim()) return;
|
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
|
||||||
|
|
||||||
// Don't speak if session was stopped
|
if (!text.trim()) {
|
||||||
if (sessionStoppedRef.current) {
|
console.log(`${platformPrefix} [VoiceContext] Empty text, skipping TTS`);
|
||||||
console.log('[VoiceContext] Session stopped, skipping TTS');
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('[VoiceContext] Speaking:', text.slice(0, 50) + '...');
|
// Don't speak if session was stopped
|
||||||
|
if (sessionStoppedRef.current) {
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] ⚠️ Session stopped, skipping TTS`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] 🔊 Starting TTS: "${text.slice(0, 50)}..."`);
|
||||||
setStatus('speaking');
|
setStatus('speaking');
|
||||||
setIsSpeaking(true);
|
setIsSpeaking(true);
|
||||||
|
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
Speech.speak(text, {
|
Speech.speak(text, {
|
||||||
language: 'en-US',
|
language: 'en-US',
|
||||||
rate: 0.9,
|
rate: 1.1, // Faster, more natural (was 0.9)
|
||||||
pitch: 1.0,
|
pitch: 1.15, // Slightly higher, less robotic (was 1.0)
|
||||||
|
// iOS Premium voice (Siri-quality, female)
|
||||||
|
// Android will use default high-quality voice
|
||||||
|
voice: Platform.OS === 'ios' ? 'com.apple.voice.premium.en-US.Samantha' : undefined,
|
||||||
onStart: () => {
|
onStart: () => {
|
||||||
console.log('[VoiceContext] TTS started');
|
console.log(`${platformPrefix} [VoiceContext] ▶️ TTS playback STARTED`);
|
||||||
},
|
},
|
||||||
onDone: () => {
|
onDone: () => {
|
||||||
console.log('[VoiceContext] TTS completed');
|
console.log(`${platformPrefix} [VoiceContext] ✅ TTS playback COMPLETED`);
|
||||||
|
|
||||||
// On iOS: Delay turning off green indicator to match STT restart delay (300ms)
|
// On iOS: Delay turning off green indicator to match STT restart delay (300ms)
|
||||||
// On Android: Turn off immediately (audio focus conflict with STT)
|
// On Android: Turn off immediately (audio focus conflict with STT)
|
||||||
if (Platform.OS === 'ios') {
|
if (Platform.OS === 'ios') {
|
||||||
|
console.log('[iOS] [VoiceContext] ⏱️ Delaying isSpeaking=false by 300ms (match STT restart)');
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
|
console.log('[iOS] [VoiceContext] → isSpeaking = false (after 300ms delay)');
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
}, 300);
|
}, 300);
|
||||||
} else {
|
} else {
|
||||||
|
console.log('[Android] [VoiceContext] → isSpeaking = false (immediate - audio focus release)');
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return to listening state after speaking (if session wasn't stopped)
|
// Return to listening state after speaking (if session wasn't stopped)
|
||||||
if (!sessionStoppedRef.current) {
|
if (!sessionStoppedRef.current) {
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] → status = listening (ready for next input)`);
|
||||||
setStatus('listening');
|
setStatus('listening');
|
||||||
|
} else {
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] ⚠️ Session stopped, NOT returning to listening`);
|
||||||
}
|
}
|
||||||
resolve();
|
resolve();
|
||||||
},
|
},
|
||||||
onError: (error) => {
|
onError: (error) => {
|
||||||
console.warn('[VoiceContext] TTS error:', error);
|
console.error(`${platformPrefix} [VoiceContext] ❌ TTS ERROR:`, error);
|
||||||
// On error, turn off indicator immediately (no delay)
|
// On error, turn off indicator immediately (no delay)
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
if (!sessionStoppedRef.current) {
|
if (!sessionStoppedRef.current) {
|
||||||
@ -458,12 +495,15 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
|
|||||||
resolve();
|
resolve();
|
||||||
},
|
},
|
||||||
onStopped: () => {
|
onStopped: () => {
|
||||||
console.log('[VoiceContext] TTS stopped (interrupted)');
|
console.log(`${platformPrefix} [VoiceContext] ⏹️ TTS STOPPED (interrupted by user)`);
|
||||||
// When interrupted by user, turn off indicator immediately
|
// When interrupted by user, turn off indicator immediately
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
// Don't set status to listening if session was stopped by user
|
// Don't set status to listening if session was stopped by user
|
||||||
if (!sessionStoppedRef.current) {
|
if (!sessionStoppedRef.current) {
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] → status = listening (after interruption)`);
|
||||||
setStatus('listening');
|
setStatus('listening');
|
||||||
|
} else {
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] ⚠️ Session stopped, NOT returning to listening`);
|
||||||
}
|
}
|
||||||
resolve();
|
resolve();
|
||||||
},
|
},
|
||||||
@ -483,34 +523,46 @@ export function VoiceProvider({ children }: { children: ReactNode }) {
|
|||||||
* Start voice session
|
* Start voice session
|
||||||
*/
|
*/
|
||||||
const startSession = useCallback(() => {
|
const startSession = useCallback(() => {
|
||||||
console.log('[VoiceContext] Starting voice session');
|
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] 🎤 STARTING voice session`);
|
||||||
sessionStoppedRef.current = false;
|
sessionStoppedRef.current = false;
|
||||||
setStatus('listening');
|
setStatus('listening');
|
||||||
setIsListening(true);
|
setIsListening(true);
|
||||||
setError(null);
|
setError(null);
|
||||||
setTranscript('');
|
setTranscript('');
|
||||||
setPartialTranscript('');
|
setPartialTranscript('');
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] → Session initialized, status=listening`);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stop voice session
|
* Stop voice session
|
||||||
*/
|
*/
|
||||||
const stopSession = useCallback(() => {
|
const stopSession = useCallback(() => {
|
||||||
console.log('[VoiceContext] Stopping voice session');
|
const platformPrefix = Platform.OS === 'ios' ? '[iOS]' : '[Android]';
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] 🛑 STOPPING voice session`);
|
||||||
|
|
||||||
// Mark session as stopped FIRST to prevent any pending callbacks
|
// Mark session as stopped FIRST to prevent any pending callbacks
|
||||||
sessionStoppedRef.current = true;
|
sessionStoppedRef.current = true;
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] → sessionStopped flag set to TRUE`);
|
||||||
|
|
||||||
// Abort any in-flight API requests
|
// Abort any in-flight API requests
|
||||||
if (abortControllerRef.current) {
|
if (abortControllerRef.current) {
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] → Aborting in-flight API request`);
|
||||||
abortControllerRef.current.abort();
|
abortControllerRef.current.abort();
|
||||||
abortControllerRef.current = null;
|
abortControllerRef.current = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stop TTS
|
// Stop TTS
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] → Stopping TTS`);
|
||||||
Speech.stop();
|
Speech.stop();
|
||||||
|
|
||||||
// Reset all state
|
// Reset all state
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] → Resetting all state to idle`);
|
||||||
setStatus('idle');
|
setStatus('idle');
|
||||||
setIsListening(false);
|
setIsListening(false);
|
||||||
setIsSpeaking(false);
|
setIsSpeaking(false);
|
||||||
setError(null);
|
setError(null);
|
||||||
|
console.log(`${platformPrefix} [VoiceContext] ✅ Voice session stopped`);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
// Computed values
|
// Computed values
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user