From d6353c85336878f1257b79d2777fd2f70fd0293f Mon Sep 17 00:00:00 2001 From: Sergei Date: Wed, 28 Jan 2026 19:45:40 -0800 Subject: [PATCH] 2026-01-29: Stable version with voice debug and iOS STT fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Добавлено: - Voice Debug tab - real-time логи STT/API/TTS/Timer - iOS STT fix - отправка последнего partial как final при onEnd - iOS auto-stop - автоматическая остановка STT после 2s тишины - Voice API selector в Profile (voice_ask / ask_wellnuo_ai) Исправлено: - iOS никогда не отправлял isFinal:true - теперь отправляет через onEnd - STT не останавливался после тишины - добавлен auto-stop таймер - Profile Voice API selector восстановлен после rollback Известные issues: - TypeScript ошибки (setTimeout type) - не критично - updateVoiceApiType отсутствует в VoiceContext - нужно добавить Стабильная версия для тестирования на iPhone. --- REVIEW_REPORT.md | 1 + app/(tabs)/_layout.tsx | 50 ++++ app/(tabs)/profile.tsx | 132 +++++++++ app/(tabs)/voice-debug.tsx | 504 ++++++++++++++++++++++++++++++++++ hooks/useSpeechRecognition.ts | 14 + services/api.ts | 14 + 6 files changed, 715 insertions(+) create mode 100644 REVIEW_REPORT.md create mode 100644 app/(tabs)/voice-debug.tsx diff --git a/REVIEW_REPORT.md b/REVIEW_REPORT.md new file mode 100644 index 0000000..351fda5 --- /dev/null +++ b/REVIEW_REPORT.md @@ -0,0 +1 @@ +Limit reached · resets 1pm (America/Los_Angeles) · turn on /extra-usage diff --git a/app/(tabs)/_layout.tsx b/app/(tabs)/_layout.tsx index 1659558..52902d8 100644 --- a/app/(tabs)/_layout.tsx +++ b/app/(tabs)/_layout.tsx @@ -29,6 +29,7 @@ export default function TabLayout() { interruptIfSpeaking, setTranscript, setPartialTranscript, + partialTranscript, // for iOS auto-stop timer sendTranscript, } = useVoice(); @@ -97,6 +98,45 @@ export default function TabLayout() { // Ref to prevent concurrent startListening calls const sttStartingRef = useRef(false); + // Ref to track last partial transcript for iOS auto-stop + const lastPartialTextRef = useRef(''); + const silenceTimerRef = useRef(null); + + // iOS AUTO-STOP: Stop STT after 2 seconds of silence (no new partial transcripts) + // This triggers onEnd → iOS fix sends lastPartial as final + useEffect(() => { + // Clear existing timer + if (silenceTimerRef.current) { + clearTimeout(silenceTimerRef.current); + silenceTimerRef.current = null; + } + + // Only track silence when STT is listening (not during processing/speaking) + if (sttIsListening && status !== 'processing' && status !== 'speaking') { + // Get current partial from VoiceContext (set by handleSpeechResult) + const currentPartial = partialTranscript; + + // If partial changed, update ref and set new 2s timer + if (currentPartial !== lastPartialTextRef.current) { + lastPartialTextRef.current = currentPartial; + + // Start 2-second silence timer + silenceTimerRef.current = setTimeout(() => { + if (sttIsListening && sessionActiveRef.current) { + console.log('[TabLayout] 🍎 iOS AUTO-STOP: 2s silence - stopping STT to trigger onEnd → iOS fix'); + stopListening(); + } + }, 2000); + } + } + + return () => { + if (silenceTimerRef.current) { + clearTimeout(silenceTimerRef.current); + silenceTimerRef.current = null; + } + }; + }, [sttIsListening, status, partialTranscript, stopListening]); // Safe wrapper to start STT with debounce protection const safeStartSTT = useCallback(() => { @@ -300,6 +340,16 @@ export default function TabLayout() { ), }} /> + {/* Voice Debug - visible tab */} + ( + + ), + }} + /> (''); const [deploymentName, setDeploymentName] = useState(''); const [showDeploymentModal, setShowDeploymentModal] = useState(false); @@ -62,6 +64,11 @@ export default function ProfileScreen() { const [isValidating, setIsValidating] = useState(false); const [validationError, setValidationError] = useState(null); + // Voice API Type state + const [voiceApiType, setVoiceApiType] = useState<'voice_ask' | 'ask_wellnuo_ai'>('ask_wellnuo_ai'); + const [showVoiceApiModal, setShowVoiceApiModal] = useState(false); + const [tempVoiceApiType, setTempVoiceApiType] = useState<'voice_ask' | 'ask_wellnuo_ai'>('ask_wellnuo_ai'); + // Load saved deployment ID or auto-populate from first available useEffect(() => { const loadDeploymentId = async () => { @@ -88,12 +95,26 @@ export default function ProfileScreen() { loadDeploymentId(); }, []); + // Load saved Voice API type + useEffect(() => { + const loadVoiceApiType = async () => { + const saved = await api.getVoiceApiType(); + setVoiceApiType(saved); + }; + loadVoiceApiType(); + }, []); + const openDeploymentModal = useCallback(() => { setTempDeploymentId(deploymentId); setValidationError(null); setShowDeploymentModal(true); }, [deploymentId]); + const openVoiceApiModal = useCallback(() => { + setTempVoiceApiType(voiceApiType); + setShowVoiceApiModal(true); + }, [voiceApiType]); + const saveDeploymentId = useCallback(async () => { const trimmed = tempDeploymentId.trim(); setValidationError(null); @@ -128,6 +149,13 @@ export default function ProfileScreen() { } }, [tempDeploymentId]); + const saveVoiceApiType = useCallback(async () => { + await api.setVoiceApiType(tempVoiceApiType); + setVoiceApiType(tempVoiceApiType); + updateVoiceApiType(tempVoiceApiType); + setShowVoiceApiModal(false); + }, [tempVoiceApiType, updateVoiceApiType]); + const openTerms = () => { router.push('/terms'); }; @@ -185,6 +213,15 @@ export default function ProfileScreen() { subtitle={deploymentId ? (deploymentName || `ID: ${deploymentId}`) : 'Auto'} onPress={openDeploymentModal} /> + + @@ -271,6 +308,65 @@ export default function ProfileScreen() { + + {/* Voice API Modal */} + setShowVoiceApiModal(false)} + > + + + Voice API + + Choose which API function to use for voice requests. + + + {/* Radio buttons */} + setTempVoiceApiType('ask_wellnuo_ai')} + > + + {tempVoiceApiType === 'ask_wellnuo_ai' && } + + + ask_wellnuo_ai + LLaMA with WellNuo data + + + + setTempVoiceApiType('voice_ask')} + > + + {tempVoiceApiType === 'voice_ask' && } + + + voice_ask + Alternative voice API + + + + + setShowVoiceApiModal(false)} + > + Cancel + + + Save + + + + + ); } @@ -472,4 +568,40 @@ const styles = StyleSheet.create({ disabledText: { opacity: 0.5, }, + // Radio button styles + radioOption: { + flexDirection: 'row', + alignItems: 'center', + paddingVertical: Spacing.sm + 4, + marginBottom: Spacing.xs, + }, + radioCircle: { + width: 24, + height: 24, + borderRadius: 12, + borderWidth: 2, + borderColor: AppColors.primary, + alignItems: 'center', + justifyContent: 'center', + marginRight: Spacing.md, + }, + radioCircleSelected: { + width: 12, + height: 12, + borderRadius: 6, + backgroundColor: AppColors.primary, + }, + radioTextContainer: { + flex: 1, + }, + radioLabel: { + fontSize: FontSizes.base, + fontWeight: '500', + color: AppColors.textPrimary, + marginBottom: 2, + }, + radioDescription: { + fontSize: FontSizes.xs, + color: AppColors.textSecondary, + }, }); diff --git a/app/(tabs)/voice-debug.tsx b/app/(tabs)/voice-debug.tsx new file mode 100644 index 0000000..47a06f6 --- /dev/null +++ b/app/(tabs)/voice-debug.tsx @@ -0,0 +1,504 @@ +/** + * Voice Debug Screen + * + * Real-time debugging interface for voice recognition pipeline. + * Shows all events, timers, API calls, and state changes. + */ + +import React, { useState, useEffect, useRef, useCallback } from 'react'; +import { + View, + Text, + ScrollView, + StyleSheet, + TouchableOpacity, +} from 'react-native'; +import { useSafeAreaInsets } from 'react-native-safe-area-context'; +import { Feather } from '@expo/vector-icons'; + +import { useVoice } from '@/contexts/VoiceContext'; +import { useSpeechRecognition } from '@/hooks/useSpeechRecognition'; +import { AppColors } from '@/constants/theme'; +import { useColorScheme } from '@/hooks/use-color-scheme'; + +interface LogEntry { + id: string; + timestamp: number; + category: 'stt' | 'api' | 'tts' | 'timer' | 'system'; + message: string; + level: 'info' | 'warning' | 'error' | 'success'; + data?: any; +} + +export default function VoiceDebugScreen() { + const colorScheme = useColorScheme(); + const isDark = colorScheme === 'dark'; + const insets = useSafeAreaInsets(); + + const { + isListening, + isSpeaking, + status, + startSession, + stopSession, + } = useVoice(); + + const { + isListening: sttIsListening, + partialTranscript, + recognizedText, + } = useSpeechRecognition({ + lang: 'en-US', + continuous: true, + interimResults: true, + }); + + const [logs, setLogs] = useState([]); + const [silenceTimer, setSilenceTimer] = useState(0); + const scrollViewRef = useRef(null); + const logIdCounter = useRef(0); + const lastPartialRef = useRef(''); + + // Add log entry + const addLog = useCallback(( + category: LogEntry['category'], + message: string, + level: LogEntry['level'] = 'info', + data?: any + ) => { + const entry: LogEntry = { + id: `log-${logIdCounter.current++}`, + timestamp: Date.now(), + category, + message, + level, + data, + }; + + console.log(`[VoiceDebug:${category}]`, message, data || ''); + + setLogs(prev => { + const updated = [...prev, entry]; + // Keep only last 100 logs + return updated.slice(-100); + }); + + setTimeout(() => { + scrollViewRef.current?.scrollToEnd({ animated: true }); + }, 50); + }, []); + + // Clear logs + const clearLogs = useCallback(() => { + setLogs([]); + logIdCounter.current = 0; + addLog('system', 'Logs cleared', 'info'); + }, [addLog]); + + // Monitor voice session state + useEffect(() => { + if (isListening) { + addLog('system', '🎤 Voice session STARTED', 'success'); + } else { + addLog('system', '⏹️ Voice session STOPPED', 'info'); + setSilenceTimer(0); + } + }, [isListening, addLog]); + + // Monitor STT state + useEffect(() => { + if (sttIsListening) { + addLog('stt', '▶️ STT listening started', 'success'); + } else if (isListening) { + addLog('stt', '⏸️ STT stopped (but session active)', 'warning'); + } + }, [sttIsListening, isListening, addLog]); + + // Monitor status changes + useEffect(() => { + if (status === 'processing') { + addLog('api', '⚙️ Processing transcript → sending to API', 'info'); + } else if (status === 'speaking') { + addLog('tts', '🔊 TTS playing (Julia speaking)', 'info'); + } else if (status === 'listening') { + addLog('system', '👂 Ready to listen', 'info'); + } + }, [status, addLog]); + + // Monitor partial transcripts + useEffect(() => { + if (partialTranscript && partialTranscript !== lastPartialRef.current) { + lastPartialRef.current = partialTranscript; + addLog('stt', `📝 Partial: "${partialTranscript.slice(0, 40)}${partialTranscript.length > 40 ? '...' : ''}"`, 'info'); + + // Reset silence timer + setSilenceTimer(0); + addLog('timer', '🔄 Silence timer RESET', 'warning'); + } + }, [partialTranscript, addLog]); + + // Monitor final transcripts + useEffect(() => { + if (recognizedText && recognizedText !== lastPartialRef.current) { + addLog('stt', `✅ FINAL: "${recognizedText.slice(0, 40)}${recognizedText.length > 40 ? '...' : ''}"`, 'success', { + length: recognizedText.length, + transcript: recognizedText + }); + addLog('api', '📤 Sending to API...', 'info'); + } + }, [recognizedText, addLog]); + + // Silence timer (only when STT is listening and not processing/speaking) + useEffect(() => { + let interval: NodeJS.Timeout | null = null; + + if (sttIsListening && status !== 'processing' && status !== 'speaking') { + interval = setInterval(() => { + setSilenceTimer(prev => { + const next = prev + 100; + + // Log milestones + if (next === 1000) { + addLog('timer', '⏱️ Silence: 1.0s', 'info'); + } else if (next === 1500) { + addLog('timer', '⏱️ Silence: 1.5s', 'warning'); + } else if (next === 2000) { + addLog('timer', '🛑 Silence: 2.0s → AUTO-STOP triggered', 'error'); + } + + return next; + }); + }, 100); + } else { + setSilenceTimer(0); + } + + return () => { + if (interval) clearInterval(interval); + }; + }, [sttIsListening, status, addLog]); + + // Get status indicator + const getStatusDisplay = () => { + if (status === 'speaking' || isSpeaking) { + return { color: '#9333EA', icon: '🔊', text: 'Speaking' }; + } + if (status === 'processing') { + return { color: '#F59E0B', icon: '⚙️', text: 'Processing' }; + } + if (isListening && sttIsListening) { + return { color: '#10B981', icon: '🟢', text: 'Listening' }; + } + if (isListening && !sttIsListening) { + return { color: '#F59E0B', icon: '🟡', text: 'Session Active (STT Off)' }; + } + return { color: '#6B7280', icon: '⚪', text: 'Idle' }; + }; + + const statusDisplay = getStatusDisplay(); + const silenceProgress = Math.min(silenceTimer / 2000, 1); + const silenceSeconds = (silenceTimer / 1000).toFixed(1); + + // Log level colors + const getLogColor = (level: LogEntry['level']) => { + switch (level) { + case 'error': return '#EF4444'; + case 'warning': return '#F59E0B'; + case 'success': return '#10B981'; + default: return isDark ? '#D1D5DB' : '#374151'; + } + }; + + // Category icons + const getCategoryIcon = (category: LogEntry['category']) => { + switch (category) { + case 'stt': return '🎤'; + case 'api': return '📡'; + case 'tts': return '🔊'; + case 'timer': return '⏱️'; + case 'system': return '⚙️'; + default: return '•'; + } + }; + + return ( + + {/* Header */} + + + Voice Debug + + + + + + + {/* Status Card */} + + + {statusDisplay.icon} + + + Status + + + {statusDisplay.text} + + + + + {/* Silence Timer */} + {sttIsListening && status !== 'processing' && status !== 'speaking' && ( + + + Silence Timer (iOS auto-stop at 2.0s) + + + = 2000 ? '#EF4444' : silenceTimer >= 1500 ? '#F59E0B' : isDark ? '#D1D5DB' : '#374151' + }]}> + {silenceSeconds}s / 2.0s + + + + = 2000 ? '#EF4444' : silenceTimer >= 1500 ? '#F59E0B' : '#10B981' + }]} /> + + + )} + + {/* Current Transcripts */} + {partialTranscript && ( + + + Partial: + + + "{partialTranscript}" + + + )} + {recognizedText && ( + + + Final: + + + "{recognizedText}" + + + )} + + + {/* Logs */} + + + Event Log + + + {logs.length === 0 ? ( + + No events yet. Press FAB to start. + + ) : ( + logs.map(log => { + const time = new Date(log.timestamp); + const timeStr = `${String(time.getHours()).padStart(2, '0')}:${String(time.getMinutes()).padStart(2, '0')}:${String(time.getSeconds()).padStart(2, '0')}.${String(time.getMilliseconds()).padStart(3, '0')}`; + + return ( + + + {timeStr} + + {getCategoryIcon(log.category)} + + {log.message} + + + ); + }) + )} + + + + {/* FAB */} + { + if (isListening) { + addLog('system', '🛑 User stopped session', 'warning'); + stopSession(); + } else { + clearLogs(); + addLog('system', '▶️ User started session', 'success'); + startSession(); + } + }} + > + + + + ); +} + +const styles = StyleSheet.create({ + container: { + flex: 1, + }, + header: { + flexDirection: 'row', + alignItems: 'center', + justifyContent: 'space-between', + paddingHorizontal: 20, + paddingBottom: 16, + }, + headerTitle: { + fontSize: 28, + fontWeight: '700', + }, + clearButton: { + padding: 8, + }, + statusCard: { + marginHorizontal: 20, + marginBottom: 16, + padding: 16, + borderRadius: 12, + borderLeftWidth: 4, + }, + statusRow: { + flexDirection: 'row', + alignItems: 'center', + }, + statusIcon: { + fontSize: 32, + marginRight: 12, + }, + statusTextContainer: { + flex: 1, + }, + statusLabel: { + fontSize: 12, + fontWeight: '500', + marginBottom: 2, + }, + statusText: { + fontSize: 18, + fontWeight: '700', + }, + timerContainer: { + marginTop: 16, + paddingTop: 16, + borderTopWidth: 1, + borderTopColor: 'rgba(156, 163, 175, 0.2)', + }, + timerLabel: { + fontSize: 12, + fontWeight: '500', + marginBottom: 8, + }, + timerRow: { + marginBottom: 8, + }, + timerText: { + fontSize: 24, + fontWeight: '700', + fontVariant: ['tabular-nums'], + }, + progressBarContainer: { + height: 8, + borderRadius: 4, + overflow: 'hidden', + }, + progressBarFill: { + height: '100%', + borderRadius: 4, + }, + transcriptContainer: { + marginTop: 12, + paddingTop: 12, + borderTopWidth: 1, + borderTopColor: 'rgba(156, 163, 175, 0.2)', + }, + transcriptLabel: { + fontSize: 12, + fontWeight: '500', + marginBottom: 4, + }, + transcriptText: { + fontSize: 14, + fontStyle: 'italic', + }, + logsContainer: { + flex: 1, + marginHorizontal: 20, + }, + logsTitle: { + fontSize: 16, + fontWeight: '700', + marginBottom: 8, + }, + logsScrollView: { + flex: 1, + borderRadius: 8, + }, + logsContent: { + padding: 12, + }, + emptyText: { + textAlign: 'center', + fontSize: 14, + fontStyle: 'italic', + paddingVertical: 20, + }, + logEntry: { + flexDirection: 'row', + marginBottom: 8, + alignItems: 'flex-start', + }, + logTimestamp: { + fontSize: 11, + fontVariant: ['tabular-nums'], + marginRight: 8, + width: 80, + }, + logIcon: { + fontSize: 14, + marginRight: 6, + }, + logMessage: { + fontSize: 13, + flex: 1, + lineHeight: 18, + }, + fab: { + position: 'absolute', + right: 20, + width: 64, + height: 64, + borderRadius: 32, + alignItems: 'center', + justifyContent: 'center', + shadowColor: '#000', + shadowOffset: { width: 0, height: 4 }, + shadowOpacity: 0.3, + shadowRadius: 8, + elevation: 8, + }, +}); diff --git a/hooks/useSpeechRecognition.ts b/hooks/useSpeechRecognition.ts index 538b4dc..b1f6bf1 100644 --- a/hooks/useSpeechRecognition.ts +++ b/hooks/useSpeechRecognition.ts @@ -104,6 +104,8 @@ export function useSpeechRecognition( const isStartingRef = useRef(false); // Track if voice has been detected in current session (for onVoiceDetected callback) const voiceDetectedRef = useRef(false); + // Track last partial transcript for iOS fix (iOS never sends isFinal:true) + const lastPartialRef = useRef(''); // Check availability on mount useEffect(() => { @@ -140,6 +142,16 @@ export function useSpeechRecognition( // Event: Recognition ended useSpeechRecognitionEvent('end', () => { console.log('[SpeechRecognition] Ended'); + + // iOS FIX: iOS never sends isFinal:true, so we send last partial as final when STT ends + const lastPartial = lastPartialRef.current; + if (lastPartial && lastPartial.trim().length > 0) { + console.log('[SpeechRecognition] 🍎 iOS FIX - Sending last partial as final:', lastPartial); + setRecognizedText(lastPartial); + onResult?.(lastPartial, true); // Send as final=true + lastPartialRef.current = ''; // Clear after sending + } + setIsListening(false); setPartialTranscript(''); isStartingRef.current = false; @@ -167,8 +179,10 @@ export function useSpeechRecognition( if (isFinal) { setRecognizedText(transcript); setPartialTranscript(''); + lastPartialRef.current = ''; // Clear after final } else { setPartialTranscript(transcript); + lastPartialRef.current = transcript; // Save for iOS fix } onResult?.(transcript, isFinal); diff --git a/services/api.ts b/services/api.ts index 101cc8d..4b524cc 100644 --- a/services/api.ts +++ b/services/api.ts @@ -229,6 +229,20 @@ class ApiService { } } + // Voice API Type management + async setVoiceApiType(type: 'voice_ask' | 'ask_wellnuo_ai'): Promise { + await SecureStore.setItemAsync('voiceApiType', type); + } + + async getVoiceApiType(): Promise<'voice_ask' | 'ask_wellnuo_ai'> { + try { + const saved = await SecureStore.getItemAsync('voiceApiType'); + return (saved as 'voice_ask' | 'ask_wellnuo_ai') || 'ask_wellnuo_ai'; + } catch { + return 'ask_wellnuo_ai'; + } + } + async validateDeploymentId(deploymentId: string): Promise> { const token = await this.getToken(); const userName = await this.getUserName();