use-voice-conversation.ts
1 'use client' 2 3 import { useCallback, useEffect, useRef, useState } from 'react' 4 import { useContinuousSpeech } from './use-continuous-speech' 5 import { SentenceAccumulator, AudioChunkQueue, fetchStreamTts } from '@/lib/tts-stream' 6 import { useChatStore } from '@/stores/use-chat-store' 7 8 export type VoiceConversationState = 'idle' | 'listening' | 'processing' | 'speaking' 9 10 /** Max time to wait in 'processing' before falling back to listening (30s). */ 11 const PROCESSING_TIMEOUT_MS = 30_000 12 13 export function useVoiceConversation() { 14 const [voiceState, setVoiceState] = useState<VoiceConversationState>('idle') 15 const accumulatorRef = useRef<SentenceAccumulator | null>(null) 16 const queueRef = useRef<AudioChunkQueue | null>(null) 17 const activeRef = useRef(false) 18 const processingTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null) 19 const [resumeNeeded, setResumeNeeded] = useState(0) 20 const sendMessage = useChatStore((s) => s.sendMessage) 21 22 const clearProcessingTimer = () => { 23 if (processingTimerRef.current) { 24 clearTimeout(processingTimerRef.current) 25 processingTimerRef.current = null 26 } 27 } 28 29 const speech = useContinuousSpeech({ 30 onUtterance: useCallback((text: string) => { 31 setVoiceState('processing') 32 sendMessage(text) 33 // Safety net: if no stream events arrive within timeout, resume listening 34 clearProcessingTimer() 35 processingTimerRef.current = setTimeout(() => { 36 if (activeRef.current) { 37 setVoiceState('listening') 38 setResumeNeeded((n) => n + 1) 39 } 40 }, PROCESSING_TIMEOUT_MS) 41 }, [sendMessage]), 42 }) 43 44 // When resumeNeeded increments, call speech.resume 45 useEffect(() => { 46 if (resumeNeeded > 0) speech.resume() 47 // eslint-disable-next-line react-hooks/exhaustive-deps 48 }, [resumeNeeded]) 49 50 // Called by the chat store's onStreamEvent callback 51 const handleStreamEvent = useCallback((event: { t: string; text?: string }) => { 52 if (!activeRef.current) return 53 54 if (event.t === 'd' && event.text) { 55 clearProcessingTimer() 56 setVoiceState('speaking') 57 if (!accumulatorRef.current) { 58 const queue = new AudioChunkQueue() 59 queueRef.current = queue 60 queue.onComplete = () => { 61 // Resume listening after TTS playback finishes 62 if (activeRef.current) { 63 setVoiceState('listening') 64 speech.resume() 65 } 66 } 67 accumulatorRef.current = new SentenceAccumulator((sentence) => { 68 queue.enqueue(fetchStreamTts(sentence)) 69 }) 70 } 71 accumulatorRef.current.push(event.text) 72 } else if (event.t === 'done') { 73 clearProcessingTimer() 74 // Flush remaining text to TTS 75 if (accumulatorRef.current) { 76 accumulatorRef.current.flush() 77 accumulatorRef.current = null 78 } else { 79 // No text was streamed (empty response or error) — resume listening 80 if (activeRef.current) { 81 setVoiceState('listening') 82 speech.resume() 83 } 84 } 85 } else if (event.t === 'err') { 86 // Error from the LLM — resume listening instead of staying stuck 87 clearProcessingTimer() 88 if (activeRef.current) { 89 setVoiceState('listening') 90 speech.resume() 91 } 92 } 93 }, [speech]) 94 95 const start = useCallback(() => { 96 activeRef.current = true 97 setVoiceState('listening') 98 // Register the stream event handler on the chat store 99 useChatStore.setState({ onStreamEvent: handleStreamEvent, voiceConversationActive: true }) 100 speech.start() 101 }, [speech, handleStreamEvent]) 102 103 const stop = useCallback(() => { 104 activeRef.current = false 105 setVoiceState('idle') 106 clearProcessingTimer() 107 speech.stop() 108 queueRef.current?.stop() 109 queueRef.current = null 110 accumulatorRef.current = null 111 useChatStore.setState({ onStreamEvent: null, voiceConversationActive: false }) 112 }, [speech]) 113 114 return { 115 active: activeRef.current || voiceState !== 'idle', 116 state: voiceState, 117 interimText: speech.interimText, 118 transcript: speech.transcript, 119 supported: speech.supported, 120 start, 121 stop, 122 } 123 }