web-client.html
1 <!DOCTYPE html> 2 <html lang="en"> 3 <head> 4 <meta charset="UTF-8"> 5 <meta name="viewport" content="width=device-width, initial-scale=1.0"> 6 <title>Bob Voice Assistant</title> 7 <style> 8 :root { 9 --bg: #0f0f1a; 10 --surface: #1a1a2e; 11 --text: #c8c8d8; 12 --text-dim: #666; 13 --idle: #4a6fa5; 14 --listening: #4cc9f0; 15 --processing: #7b68ee; 16 --responding: #2ec4b6; 17 --error: #e63946; 18 --muted: #444; 19 --dormant: #2a2a4a; 20 --orb-size: 140px; 21 } 22 23 @media (prefers-reduced-motion: reduce) { 24 *, *::before, *::after { animation-duration: 0.01ms !important; transition-duration: 0.01ms !important; } 25 } 26 27 * { margin: 0; padding: 0; box-sizing: border-box; } 28 body { 29 font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif; 30 background: var(--bg); color: var(--text); 31 display: flex; flex-direction: column; align-items: center; justify-content: center; 32 min-height: 100vh; overflow: hidden; 33 } 34 35 /* ── Layout ─────────────────────────────────────────── */ 36 .app { display: flex; flex-direction: column; align-items: center; width: 100%; max-width: 480px; height: 100vh; padding: 2rem 1.5rem; } 37 .header { text-align: center; margin-bottom: 1rem; flex-shrink: 0; } 38 .header h1 { font-size: 1.1rem; font-weight: 600; color: var(--idle); letter-spacing: 0.15em; text-transform: uppercase; } 39 .header .sub { font-size: 0.75rem; color: var(--text-dim); margin-top: 0.2rem; } 40 41 /* ── Orb ────────────────────────────────────────────── */ 42 .orb-area { flex: 0 0 auto; display: flex; flex-direction: column; align-items: center; justify-content: center; padding: 2rem 0; } 43 44 .orb-container { 45 position: relative; width: var(--orb-size); height: var(--orb-size); 46 cursor: pointer; -webkit-tap-highlight-color: transparent; 47 } 48 49 .orb { 50 width: 100%; height: 100%; border-radius: 50%; 51 background: radial-gradient(circle at 40% 35%, color-mix(in srgb, var(--state-color) 60%, white), var(--state-color), color-mix(in srgb, var(--state-color) 70%, black)); 52 transition: transform 0.15s ease-out, box-shadow 0.3s ease-out, --state-color 0.5s ease-out; 53 will-change: transform, box-shadow; 54 --state-color: var(--idle); 55 box-shadow: 0 0 40px color-mix(in srgb, var(--state-color) 30%, transparent), 56 0 0 80px color-mix(in srgb, var(--state-color) 15%, transparent); 57 } 58 59 /* State-specific orb colors via data attribute */ 60 [data-state="idle"] .orb { --state-color: var(--idle); } 61 [data-state="listening"] .orb { --state-color: var(--listening); } 62 [data-state="processing"] .orb { --state-color: var(--processing); } 63 [data-state="responding"] .orb { --state-color: var(--responding); } 64 [data-state="error"] .orb { --state-color: var(--error); } 65 [data-state="muted"] .orb { --state-color: var(--muted); transform: scale(0.85); } 66 [data-state="dormant"] .orb { --state-color: var(--dormant); transform: scale(0.8); opacity: 0.6; } 67 [data-state="wakeup"] .orb { --state-color: var(--listening); animation: wakeup 0.6s ease-out; } 68 69 /* Dormant slow pulse */ 70 [data-state="dormant"] .orb { animation: dormant-pulse 5s ease-in-out infinite; } 71 @keyframes dormant-pulse { 72 0%, 100% { transform: scale(0.8); opacity: 0.5; } 73 50% { transform: scale(0.84); opacity: 0.65; } 74 } 75 76 /* Wake word recognized flash */ 77 @keyframes wakeup { 78 0% { transform: scale(0.8); filter: brightness(1); } 79 30% { transform: scale(1.15); filter: brightness(1.8); } 80 100% { transform: scale(1); filter: brightness(1); } 81 } 82 83 /* Idle breathing */ 84 [data-state="idle"] .orb { animation: breathe 3.5s ease-in-out infinite; } 85 @keyframes breathe { 86 0%, 100% { transform: scale(1); } 87 50% { transform: scale(1.04); } 88 } 89 90 /* Processing shimmer */ 91 [data-state="processing"] .orb { animation: shimmer 2s ease-in-out infinite; } 92 @keyframes shimmer { 93 0%, 100% { transform: scale(1); filter: brightness(1); } 94 50% { transform: scale(1.03); filter: brightness(1.15); } 95 } 96 97 /* Error shake */ 98 [data-state="error"] .orb { animation: shake 0.4s ease-out; } 99 @keyframes shake { 100 0%, 100% { transform: translateX(0); } 101 15% { transform: translateX(-8px); } 102 30% { transform: translateX(8px); } 103 45% { transform: translateX(-6px); } 104 60% { transform: translateX(6px); } 105 75% { transform: translateX(-3px); } 106 90% { transform: translateX(3px); } 107 } 108 109 /* ── Volume ring (around orb, shows mic level) ────── */ 110 .volume-ring { 111 position: absolute; inset: -6px; border-radius: 50%; 112 border: 2px solid transparent; transition: border-color 0.2s, opacity 0.2s; 113 opacity: 0; pointer-events: none; 114 } 115 [data-state="listening"] .volume-ring { border-color: var(--listening); opacity: 0.5; } 116 [data-state="responding"] .volume-ring { border-color: var(--responding); opacity: 0.4; } 117 118 /* ── Status text ───────────────────────────────────── */ 119 .status { 120 margin-top: 1.2rem; font-size: 0.85rem; color: var(--text-dim); 121 min-height: 1.4em; text-align: center; transition: color 0.3s; 122 } 123 [data-state="listening"] .status { color: var(--listening); } 124 [data-state="processing"] .status { color: var(--processing); } 125 [data-state="responding"] .status { color: var(--responding); } 126 [data-state="error"] .status { color: var(--error); } 127 128 /* ── Transcript ────────────────────────────────────── */ 129 .transcript-area { 130 flex: 1 1 auto; width: 100%; overflow-y: auto; overflow-x: hidden; 131 padding: 0.5rem 0; min-height: 0; /* flex child scrollable */ 132 mask-image: linear-gradient(transparent, black 8%, black 92%, transparent); 133 -webkit-mask-image: linear-gradient(transparent, black 8%, black 92%, transparent); 134 } 135 .transcript { display: flex; flex-direction: column; gap: 0.6rem; padding: 0.5rem 0; } 136 .msg { font-size: 0.82rem; line-height: 1.45; padding: 0.4rem 0; border-bottom: 1px solid #ffffff08; } 137 .msg-label { font-weight: 600; font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.08em; margin-bottom: 0.15rem; } 138 .msg-user .msg-label { color: var(--listening); } 139 .msg-bob .msg-label { color: var(--responding); } 140 .msg-error .msg-label { color: var(--error); } 141 .msg-text { color: var(--text); } 142 .msg-text.partial { color: var(--text-dim); font-style: italic; } 143 .msg-text .interrupted { opacity: 0.5; } 144 .msg-text .interrupted::after { content: " (interrupted)"; font-style: italic; font-size: 0.75rem; } 145 146 /* ── Controls ──────────────────────────────────────── */ 147 .controls { flex-shrink: 0; display: flex; gap: 0.8rem; padding: 1rem 0 0.5rem; } 148 .ctrl-btn { 149 padding: 0.5rem 1.2rem; border-radius: 2rem; border: 1px solid #333; 150 background: var(--surface); color: var(--text-dim); font-size: 0.78rem; 151 cursor: pointer; transition: all 0.2s; user-select: none; 152 } 153 .ctrl-btn:hover { border-color: #555; color: var(--text); } 154 .ctrl-btn:active { transform: scale(0.96); } 155 .ctrl-btn.active { border-color: var(--error); color: var(--error); } 156 .ctrl-btn.primary { border-color: var(--idle); color: var(--idle); } 157 .ctrl-btn.primary:hover { border-color: var(--listening); color: var(--listening); background: #16213e; } 158 .ctrl-btn.danger { border-color: var(--error); color: var(--error); } 159 160 /* ── Face avatar ─────────────────────────────────────── */ 161 .face-container { display: none; width: 180px; height: 220px; position: relative; cursor: pointer; } 162 [data-view="face"] .face-container { display: block; } 163 [data-view="face"] .orb-container { display: none; } 164 165 .face-svg { width: 100%; height: 100%; } 166 .face-head { transition: fill 0.5s; } 167 [data-state="idle"] .face-head { fill: url(#faceGradIdle); } 168 [data-state="listening"] .face-head { fill: url(#faceGradListen); } 169 [data-state="processing"] .face-head { fill: url(#faceGradProcess); } 170 [data-state="responding"] .face-head { fill: url(#faceGradRespond); } 171 [data-state="error"] .face-head { fill: url(#faceGradError); } 172 [data-state="muted"] .face-head { fill: url(#faceGradMuted); } 173 174 .face-eye { fill: #e0e8f0; transition: ry 0.1s; } 175 .face-pupil { fill: #1a1a2e; transition: cx 0.3s, cy 0.3s; } 176 .face-brow { fill: none; stroke: #c8d8e8; stroke-width: 2.5; stroke-linecap: round; transition: d 0.3s; } 177 .face-mouth-inner { fill: #1a1a2e; opacity: 0; } 178 .face-mouth-lip { fill: none; stroke: #e8b4b8; stroke-width: 2; stroke-linecap: round; stroke-linejoin: round; } 179 .face-teeth { fill: #e8e8f0; opacity: 0; } 180 181 /* Blink animation */ 182 @keyframes blink { 0%,96%,100% { ry: 12px; } 97.5% { ry: 1px; } } 183 .face-eye { animation: blink 4s ease-in-out infinite; } 184 .face-eye:nth-of-type(2) { animation-delay: 0.15s; } 185 186 /* State expressions */ 187 [data-state="listening"] .face-brow { stroke: var(--listening); } 188 [data-state="processing"] .face-brow { stroke: var(--processing); } 189 [data-state="responding"] .face-brow { stroke: var(--responding); } 190 [data-state="error"] .face-eye { fill: #ffcccc; } 191 192 .view-toggle { font-size: 0.7rem; color: var(--text-dim); cursor: pointer; margin-top: 0.3rem; border: none; background: none; text-decoration: underline; } 193 .view-toggle:hover { color: var(--text); } 194 195 /* ── Screen reader only ─────────────────────────────── */ 196 .sr-only { position: absolute; width: 1px; height: 1px; padding: 0; margin: -1px; overflow: hidden; clip: rect(0,0,0,0); white-space: nowrap; border: 0; } 197 198 /* ── Scrollbar ──────────────────────────────────────── */ 199 .transcript-area::-webkit-scrollbar { width: 4px; } 200 .transcript-area::-webkit-scrollbar-track { background: transparent; } 201 .transcript-area::-webkit-scrollbar-thumb { background: #333; border-radius: 2px; } 202 </style> 203 </head> 204 <body> 205 206 <div class="app" data-state="idle" id="app"> 207 <div class="header"> 208 <h1>Bob</h1> 209 <div class="sub">Voice Assistant</div> 210 </div> 211 212 <div class="orb-area"> 213 <div class="orb-container" id="orbContainer" role="button" tabindex="0" aria-label="Activate voice assistant"> 214 <div class="orb" id="orb"></div> 215 <div class="volume-ring" id="volumeRing"></div> 216 </div> 217 218 <!-- Face avatar (hidden by default, toggled with view button) --> 219 <div class="face-container" id="faceContainer"> 220 <svg class="face-svg" viewBox="0 0 180 220" xmlns="http://www.w3.org/2000/svg"> 221 <defs> 222 <radialGradient id="faceGradIdle" cx="40%" cy="35%"><stop offset="0%" stop-color="#6a8fc5"/><stop offset="100%" stop-color="#3a5a85"/></radialGradient> 223 <radialGradient id="faceGradListen" cx="40%" cy="35%"><stop offset="0%" stop-color="#7ce0f8"/><stop offset="100%" stop-color="#2a9ab8"/></radialGradient> 224 <radialGradient id="faceGradProcess" cx="40%" cy="35%"><stop offset="0%" stop-color="#9b88ee"/><stop offset="100%" stop-color="#5b48be"/></radialGradient> 225 <radialGradient id="faceGradRespond" cx="40%" cy="35%"><stop offset="0%" stop-color="#4ee4d6"/><stop offset="100%" stop-color="#1ea496"/></radialGradient> 226 <radialGradient id="faceGradError" cx="40%" cy="35%"><stop offset="0%" stop-color="#f06976"/><stop offset="100%" stop-color="#c02936"/></radialGradient> 227 <radialGradient id="faceGradMuted" cx="40%" cy="35%"><stop offset="0%" stop-color="#666"/><stop offset="100%" stop-color="#333"/></radialGradient> 228 </defs> 229 <!-- Head --> 230 <ellipse class="face-head" cx="90" cy="110" rx="75" ry="90"/> 231 <!-- Eyes --> 232 <ellipse class="face-eye" cx="62" cy="95" rx="10" ry="12"/> 233 <ellipse class="face-eye" cx="118" cy="95" rx="10" ry="12"/> 234 <!-- Pupils --> 235 <circle class="face-pupil" id="pupilL" cx="62" cy="96" r="5"/> 236 <circle class="face-pupil" id="pupilR" cx="118" cy="96" r="5"/> 237 <!-- Eyebrows --> 238 <path class="face-brow" id="browL" d="M 48 78 Q 62 72, 76 78"/> 239 <path class="face-brow" id="browR" d="M 104 78 Q 118 72, 132 78"/> 240 <!-- Mouth --> 241 <path class="face-mouth-inner" id="mouthInner" d=""/> 242 <path class="face-teeth" id="mouthTeeth" d=""/> 243 <path class="face-mouth-lip" id="mouthLip" d="M 70 145 Q 90 148, 110 145"/> 244 </svg> 245 </div> 246 247 <button class="view-toggle" id="viewToggle">Switch to face</button> 248 <div class="status" id="statusText">Tap the orb to start</div> 249 </div> 250 251 <div class="transcript-area" id="transcriptArea"> 252 <div class="transcript" id="transcript"></div> 253 </div> 254 255 <div class="controls"> 256 <button class="ctrl-btn primary" id="connectBtn">Connect</button> 257 <button class="ctrl-btn" id="muteBtn" style="display:none">Mute</button> 258 <button class="ctrl-btn danger" id="disconnectBtn" style="display:none">Disconnect</button> 259 <a href="/enroll.html" class="ctrl-btn" style="text-decoration:none;display:inline-block">Enroll Voice</a> 260 </div> 261 262 <!-- Accessibility: live region for state announcements --> 263 <div aria-live="assertive" aria-atomic="true" class="sr-only" id="a11yAnnounce"></div> 264 </div> 265 266 <script> 267 // ── Configuration ────────────────────────────────────── 268 const WS_URL = location.hostname.endsWith('.genexergy.org') 269 ? `wss://${location.hostname}/ws` 270 : `ws://${location.hostname}:10700`; 271 const SAMPLE_RATE_IN = 16000; // mic capture rate (matches STT) 272 const SAMPLE_RATE_OUT = 24000; // TTS playback rate (matches Kokoro) 273 274 // ── State ────────────────────────────────────────────── 275 let state = 'idle'; // idle | listening | processing | responding | error | muted 276 let ws = null, audioCtx = null, mediaStream = null, processor = null; 277 let analyser = null, analyserData = null; 278 let isConnected = false, isMuted = false; 279 let micLevel = 0, outputLevel = 0; 280 let rafId = null; 281 let audioQueue = [], isPlaying = false; 282 let errorStrikes = 0; 283 let silenceTimer = null; 284 let wakeWordGateActive = false; // true when wake word gate is in use (dormant/wakeup states) 285 286 // ── DOM refs ─────────────────────────────────────────── 287 const app = document.getElementById('app'); 288 const orb = document.getElementById('orb'); 289 const volumeRing = document.getElementById('volumeRing'); 290 const statusText = document.getElementById('statusText'); 291 const transcript = document.getElementById('transcript'); 292 const transcriptArea = document.getElementById('transcriptArea'); 293 const connectBtn = document.getElementById('connectBtn'); 294 const muteBtn = document.getElementById('muteBtn'); 295 const disconnectBtn = document.getElementById('disconnectBtn'); 296 const a11y = document.getElementById('a11yAnnounce'); 297 const orbContainer = document.getElementById('orbContainer'); 298 299 // ── Earcons (generated tones) ────────────────────────── 300 const earcons = { 301 listen: () => playTone([523, 659], 0.15, 0.08), // C5→E5 ascending 302 wakeup: () => playTone([440, 554, 659], 0.12, 0.1), // A4→C#5→E5 major triad — wake word recognized 303 process: () => playTone([440], 0.05, 0.04), // A4 click 304 error: () => playTone([349, 311], 0.25, 0.1), // F4→Eb4 descending minor 305 complete: () => playTone([392, 523], 0.2, 0.06), // G4→C5 resolution 306 mute: () => playTone([330], 0.08, 0.05), // E4 click 307 }; 308 309 function playTone(freqs, dur, vol) { 310 if (!audioCtx) return; 311 freqs.forEach((freq, i) => { 312 const osc = audioCtx.createOscillator(); 313 const gain = audioCtx.createGain(); 314 osc.type = 'sine'; 315 osc.frequency.value = freq; 316 gain.gain.setValueAtTime(0, audioCtx.currentTime + i * dur * 0.6); 317 gain.gain.linearRampToValueAtTime(vol, audioCtx.currentTime + i * dur * 0.6 + 0.01); 318 gain.gain.exponentialRampToValueAtTime(0.001, audioCtx.currentTime + (i + 1) * dur * 0.6 + dur); 319 osc.connect(gain).connect(audioCtx.destination); 320 osc.start(audioCtx.currentTime + i * dur * 0.6); 321 osc.stop(audioCtx.currentTime + (i + 1) * dur * 0.6 + dur + 0.05); 322 }); 323 } 324 325 // ── State Machine ────────────────────────────────────── 326 function setState(newState, statusMsg) { 327 if (state === newState && !statusMsg) return; 328 const prev = state; 329 state = newState; 330 app.dataset.state = newState; 331 332 // Status text 333 const messages = { 334 idle: isConnected ? 'Ready — speak anytime' : 'Tap the orb to start', 335 dormant: 'Say "Hey Bob" to wake me up', 336 listening: 'Listening...', 337 processing: 'Thinking...', 338 responding: 'Speaking...', 339 error: statusMsg || "Something went wrong", 340 muted: 'Microphone muted', 341 }; 342 statusText.textContent = statusMsg || messages[newState] || ''; 343 344 // A11y announcement 345 const announcements = { 346 idle: 'Bob is ready', 347 dormant: 'Say Hey Bob to activate', 348 listening: 'Bob is listening', 349 processing: 'Processing your request', 350 responding: 'Bob is responding', 351 error: statusMsg || 'Error occurred', 352 muted: 'Microphone muted', 353 }; 354 a11y.textContent = announcements[newState] || ''; 355 356 // Earcons 357 if (newState === 'wakeup') earcons.wakeup(); 358 if (newState === 'listening' && prev !== 'listening' && prev !== 'wakeup') earcons.listen(); 359 if (newState === 'processing') earcons.process(); 360 if (newState === 'error') earcons.error(); 361 if (newState === 'idle' && prev === 'responding') earcons.complete(); 362 if (newState === 'dormant' && prev === 'responding') earcons.complete(); 363 if (newState === 'muted' || (prev === 'muted' && newState !== 'muted')) earcons.mute(); 364 365 // Auto-recover from error 366 if (newState === 'error') { 367 setTimeout(() => { if (state === 'error') setState('idle'); }, 4000); 368 } 369 370 // Silence watchdog for processing 371 clearTimeout(silenceTimer); 372 if (newState === 'processing') { 373 silenceTimer = setTimeout(() => { 374 if (state === 'processing') { 375 setState('error', "This is taking too long. Please try again."); 376 errorStrikes++; 377 } 378 }, 15000); 379 } 380 } 381 382 // ── Audio Visualization Loop ─────────────────────────── 383 function startVisualization() { 384 if (rafId) return; 385 function tick() { 386 rafId = requestAnimationFrame(tick); 387 388 // Mic level from analyser 389 if (analyser && analyserData && (state === 'listening' || state === 'idle')) { 390 analyser.getByteFrequencyData(analyserData); 391 let sum = 0; 392 for (let i = 0; i < analyserData.length; i++) sum += analyserData[i]; 393 const raw = sum / analyserData.length / 255; 394 micLevel += (raw - micLevel) * 0.15; // smooth 395 } else { 396 micLevel *= 0.9; // decay 397 } 398 399 // Apply to orb 400 let scale = 1, glow = 0.25; 401 if (state === 'listening') { 402 scale = 1 + micLevel * 0.3; 403 glow = 0.3 + micLevel * 0.5; 404 } else if (state === 'responding') { 405 scale = 1 + outputLevel * 0.2; 406 glow = 0.25 + outputLevel * 0.45; 407 } 408 409 if (state === 'listening' || state === 'responding') { 410 orb.style.transform = `scale(${scale})`; 411 orb.style.boxShadow = `0 0 ${40 + glow * 60}px color-mix(in srgb, var(--state-color) ${Math.round(glow * 50)}%, transparent), 0 0 ${80 + glow * 40}px color-mix(in srgb, var(--state-color) ${Math.round(glow * 25)}%, transparent)`; 412 volumeRing.style.opacity = (0.3 + micLevel * 0.7).toString(); 413 volumeRing.style.transform = `scale(${1 + micLevel * 0.08})`; 414 } else { 415 orb.style.transform = ''; 416 orb.style.boxShadow = ''; 417 volumeRing.style.opacity = ''; 418 volumeRing.style.transform = ''; 419 } 420 } 421 tick(); 422 } 423 424 function stopVisualization() { 425 if (rafId) { cancelAnimationFrame(rafId); rafId = null; } 426 } 427 428 // ── Transcript ───────────────────────────────────────── 429 let currentUserMsg = null, currentBobMsg = null; 430 431 function parseSpeakerTag(text) { 432 // Parse "[Cam] tell me a joke" → {speaker: "Cam", text: "tell me a joke"} 433 const match = text.match(/^\[([^\]]+)\]\s*(.*)/s); 434 if (match) return { speaker: match[1], text: match[2] }; 435 return { speaker: null, text: text }; 436 } 437 438 function addMessage(who, text, isPartial = false) { 439 if (who === 'user') { 440 // Parse speaker tag from diarization 441 const parsed = parseSpeakerTag(text); 442 const label = parsed.speaker || 'You'; 443 text = parsed.text || text; 444 445 if (isPartial) { 446 if (!currentUserMsg) { 447 currentUserMsg = createMsgElement(label, 'msg-user'); 448 transcript.appendChild(currentUserMsg); 449 } 450 currentUserMsg.querySelector('.msg-text').textContent = text; 451 currentUserMsg.querySelector('.msg-label').textContent = label; 452 currentUserMsg.querySelector('.msg-text').classList.toggle('partial', true); 453 } else { 454 if (currentUserMsg) { 455 currentUserMsg.querySelector('.msg-text').textContent = text; 456 currentUserMsg.querySelector('.msg-label').textContent = label; 457 currentUserMsg.querySelector('.msg-text').classList.remove('partial'); 458 currentUserMsg = null; 459 } else { 460 const el = createMsgElement(label, 'msg-user'); 461 el.querySelector('.msg-text').textContent = text; 462 transcript.appendChild(el); 463 } 464 } 465 } else if (who === 'bob') { 466 if (!currentBobMsg) { 467 currentBobMsg = createMsgElement('Bob', 'msg-bob'); 468 transcript.appendChild(currentBobMsg); 469 } 470 const textEl = currentBobMsg.querySelector('.msg-text'); 471 textEl.textContent += text; 472 } else if (who === 'error') { 473 const el = createMsgElement('', 'msg-error'); 474 el.querySelector('.msg-text').textContent = text; 475 transcript.appendChild(el); 476 } 477 transcriptArea.scrollTop = transcriptArea.scrollHeight; 478 } 479 480 function finalizeBobMsg() { 481 currentBobMsg = null; 482 } 483 484 function createMsgElement(label, cls) { 485 const div = document.createElement('div'); 486 div.className = `msg ${cls}`; 487 div.innerHTML = `<div class="msg-label">${label}</div><div class="msg-text"></div>`; 488 return div; 489 } 490 491 // ── Audio Playback (gapless scheduled) ───────────────── 492 let nextPlayTime = 0; 493 let playEndTimer = null; 494 495 function queueAudio(buffer) { 496 if (!audioCtx) return; 497 const view = new DataView(buffer); 498 const samples = new Float32Array(buffer.byteLength / 2); 499 let rms = 0; 500 for (let i = 0; i < samples.length; i++) { 501 samples[i] = view.getInt16(i * 2, true) / 32768; 502 rms += samples[i] * samples[i]; 503 } 504 outputLevel = Math.sqrt(rms / samples.length) * 3; 505 isPlaying = true; 506 507 const ab = audioCtx.createBuffer(1, samples.length, SAMPLE_RATE_OUT); 508 ab.copyToChannel(samples, 0); 509 const src = audioCtx.createBufferSource(); 510 src.buffer = ab; 511 src.connect(audioCtx.destination); 512 513 // Schedule gaplessly: each chunk starts exactly when the previous ends 514 const now = audioCtx.currentTime; 515 if (nextPlayTime < now) nextPlayTime = now; 516 src.start(nextPlayTime); 517 nextPlayTime += ab.duration; 518 519 // Set a timer to detect when all audio has finished 520 clearTimeout(playEndTimer); 521 playEndTimer = setTimeout(() => { 522 if (audioCtx && audioCtx.currentTime >= nextPlayTime - 0.05) { 523 isPlaying = false; 524 outputLevel = 0; 525 } 526 }, (nextPlayTime - now) * 1000 + 100); 527 } 528 529 function stopPlayback() { 530 nextPlayTime = 0; 531 isPlaying = false; 532 outputLevel = 0; 533 clearTimeout(playEndTimer); 534 } 535 536 // ── WebSocket Connection ─────────────────────────────── 537 async function connect() { 538 connectBtn.style.display = 'none'; 539 setState('idle', 'Connecting...'); 540 541 try { 542 mediaStream = await navigator.mediaDevices.getUserMedia({ 543 audio: { sampleRate: SAMPLE_RATE_IN, channelCount: 1, echoCancellation: true, noiseSuppression: true } 544 }); 545 546 // Separate contexts: input at 16kHz for STT/wake word, output at 24kHz for TTS playback 547 const inputCtx = new AudioContext({ sampleRate: SAMPLE_RATE_IN }); 548 audioCtx = new AudioContext({ sampleRate: SAMPLE_RATE_OUT }); 549 const source = inputCtx.createMediaStreamSource(mediaStream); 550 551 // Analyser for visualization (on input context) 552 analyser = inputCtx.createAnalyser(); 553 analyser.fftSize = 256; 554 analyserData = new Uint8Array(analyser.frequencyBinCount); 555 source.connect(analyser); 556 557 // PCM processor for sending audio (on input context at 16kHz) 558 await inputCtx.audioWorklet.addModule(URL.createObjectURL(new Blob([` 559 class PCMProcessor extends AudioWorkletProcessor { 560 process(inputs) { 561 const input = inputs[0][0]; 562 if (input) { 563 const pcm16 = new Int16Array(input.length); 564 for (let i = 0; i < input.length; i++) { 565 pcm16[i] = Math.max(-32768, Math.min(32767, Math.round(input[i] * 32767))); 566 } 567 this.port.postMessage(pcm16.buffer, [pcm16.buffer]); 568 } 569 return true; 570 } 571 } 572 registerProcessor('pcm-processor', PCMProcessor); 573 `], { type: 'application/javascript' }))); 574 575 processor = new AudioWorkletNode(inputCtx, 'pcm-processor'); 576 source.connect(processor); 577 578 // WebSocket 579 ws = new WebSocket(WS_URL); 580 ws.binaryType = 'arraybuffer'; 581 582 ws.onopen = () => { 583 isConnected = true; 584 errorStrikes = 0; 585 wakeWordGateActive = false; // Will be set true if server sends wake-word-state 586 muteBtn.style.display = ''; 587 disconnectBtn.style.display = ''; 588 setState('idle', 'Connected — waiting for wake word status...'); 589 startVisualization(); 590 }; 591 592 ws.onmessage = (event) => { 593 if (event.data instanceof ArrayBuffer) { 594 if (state !== 'responding') setState('responding'); 595 queueAudio(event.data); 596 } else { 597 try { 598 const msg = JSON.parse(event.data); 599 if (msg.label || msg.type) console.log('WS:', JSON.stringify(msg).slice(0, 150)); 600 // RTVI protocol messages (label: "rtvi-ai") 601 if (msg.label === 'rtvi-ai') { 602 const t = msg.type; 603 const d = msg.data || {}; 604 if (t === 'wake-word-state') { 605 wakeWordGateActive = true; 606 if (d.state === 'waiting') { 607 setState('dormant'); 608 } else if (d.state === 'activated') { 609 setState('wakeup'); 610 setTimeout(() => { if (state === 'wakeup') setState('listening'); }, 700); 611 } 612 } else if (t === 'user-transcription') { 613 addMessage('user', d.text, false); 614 setState('processing'); 615 } else if (t === 'user-started-speaking') { 616 // Only transition to listening if wake word gate is not active (or already past dormant) 617 if (!wakeWordGateActive || (state !== 'dormant' && state !== 'idle')) { 618 stopPlayback(); 619 setState('listening'); 620 } 621 } else if (t === 'bot-llm-text' && d.text) { 622 // Strip think tags from transcript 623 let txt = d.text; 624 if (txt.includes('<think>')) { window._inThink = true; txt = txt.replace(/<think>[\s\S]*/,''); } 625 if (txt.includes('</think>')) { window._inThink = false; txt = txt.replace(/[\s\S]*<\/think>/,''); } 626 if (window._inThink) txt = ''; 627 txt = txt.replace(/<think>[\s\S]*?<\/think>/g, ''); 628 if (txt) addMessage('bob', txt); 629 } else if (t === 'bot-llm-started') { 630 setState('processing'); 631 } else if (t === 'bot-tts-started') { 632 setState('responding'); 633 } else if (t === 'bot-tts-stopped') { 634 finalizeBobMsg(); 635 setState('listening'); 636 } 637 } 638 // Direct Pipecat frame messages (no label) 639 else if (msg.type === 'UserTranscriptionFrame' || msg.type === 'transcript') { 640 addMessage('user', msg.text, false); 641 setState('processing'); 642 } else if (msg.type === 'InterimTranscriptionFrame') { 643 addMessage('user', msg.text, true); 644 } else if (msg.type === 'TextFrame' || msg.type === 'bot_transcript') { 645 addMessage('bob', msg.text); 646 } else if (msg.type === 'BotStartedSpeakingFrame') { 647 setState('responding'); 648 } else if (msg.type === 'BotStoppedSpeakingFrame') { 649 finalizeBobMsg(); 650 setState('listening'); 651 } else if (msg.type === 'UserStartedSpeakingFrame') { 652 audioQueue = []; isPlaying = false; 653 setState('listening'); 654 } 655 } catch(e) {} 656 } 657 }; 658 659 ws.onclose = () => { disconnect(); }; 660 ws.onerror = () => { 661 setState('error', 'Connection lost'); 662 disconnect(); 663 }; 664 665 processor.port.onmessage = (e) => { 666 if (ws && ws.readyState === WebSocket.OPEN && !isMuted) { 667 ws.send(e.data); 668 } 669 }; 670 671 } catch(err) { 672 setState('error', 'Microphone access denied'); 673 connectBtn.style.display = ''; 674 } 675 } 676 677 function disconnect() { 678 stopVisualization(); 679 if (ws) { ws.close(); ws = null; } 680 if (processor) { processor.disconnect(); processor = null; } 681 if (analyser) { analyser = null; analyserData = null; } 682 if (audioCtx) { audioCtx.close().catch(()=>{}); audioCtx = null; } 683 if (mediaStream) { mediaStream.getTracks().forEach(t => t.stop()); mediaStream = null; } 684 isConnected = false; isMuted = false; wakeWordGateActive = false; 685 audioQueue = []; isPlaying = false; 686 muteBtn.style.display = 'none'; 687 disconnectBtn.style.display = 'none'; 688 connectBtn.style.display = ''; 689 setState('idle', 'Disconnected. Tap to reconnect.'); 690 } 691 692 function toggleMute() { 693 isMuted = !isMuted; 694 muteBtn.textContent = isMuted ? 'Unmute' : 'Mute'; 695 muteBtn.classList.toggle('active', isMuted); 696 if (isMuted) { 697 setState('muted'); 698 } else { 699 setState('listening'); 700 } 701 } 702 703 // ── Event Listeners ──────────────────────────────────── 704 connectBtn.addEventListener('click', connect); 705 disconnectBtn.addEventListener('click', disconnect); 706 muteBtn.addEventListener('click', toggleMute); 707 708 orbContainer.addEventListener('click', () => { 709 if (!isConnected) { connect(); return; } 710 if (isMuted) { toggleMute(); return; } 711 if (state === 'idle') setState('listening'); 712 }); 713 714 // Keyboard shortcuts 715 document.addEventListener('keydown', (e) => { 716 if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return; 717 if (e.code === 'Space') { e.preventDefault(); if (!isConnected) connect(); else if (state === 'idle') setState('listening'); } 718 if (e.code === 'Escape') { if (isConnected) disconnect(); } 719 if (e.code === 'KeyM') { if (isConnected) toggleMute(); } 720 }); 721 722 // ── Face Avatar ──────────────────────────────────────── 723 const viewToggle = document.getElementById('viewToggle'); 724 const faceContainer = document.getElementById('faceContainer'); 725 const mouthLip = document.getElementById('mouthLip'); 726 const mouthInner = document.getElementById('mouthInner'); 727 const mouthTeeth = document.getElementById('mouthTeeth'); 728 const browL = document.getElementById('browL'); 729 const browR = document.getElementById('browR'); 730 const pupilL = document.getElementById('pupilL'); 731 const pupilR = document.getElementById('pupilR'); 732 let currentView = 'orb'; 733 734 viewToggle.addEventListener('click', () => { 735 currentView = currentView === 'orb' ? 'face' : 'orb'; 736 app.dataset.view = currentView; 737 viewToggle.textContent = currentView === 'orb' ? 'Switch to face' : 'Switch to orb'; 738 }); 739 740 faceContainer.addEventListener('click', () => { 741 if (!isConnected) { connect(); return; } 742 if (isMuted) { toggleMute(); return; } 743 }); 744 745 // Pupil tracking (follows mouse) 746 document.addEventListener('mousemove', (e) => { 747 if (currentView !== 'face') return; 748 const rect = faceContainer.getBoundingClientRect(); 749 const cx = rect.left + rect.width / 2; 750 const cy = rect.top + rect.height * 0.43; 751 const dx = (e.clientX - cx) / window.innerWidth * 4; 752 const dy = (e.clientY - cy) / window.innerHeight * 3; 753 const clamp = (v, min, max) => Math.max(min, Math.min(max, v)); 754 pupilL.setAttribute('cx', 62 + clamp(dx, -3, 3)); 755 pupilL.setAttribute('cy', 96 + clamp(dy, -2, 2)); 756 pupilR.setAttribute('cx', 118 + clamp(dx, -3, 3)); 757 pupilR.setAttribute('cy', 96 + clamp(dy, -2, 2)); 758 }); 759 760 // Mouth shape driven by output audio level 761 let mouthOpen = 0, mouthWidth = 0.5, mouthRound = 0; 762 // Output audio analyser (connect during playback) 763 let outAnalyser = null, outAnalyserData = null; 764 765 function initOutputAnalyser() { 766 if (outAnalyser || !audioCtx) return; 767 outAnalyser = audioCtx.createAnalyser(); 768 outAnalyser.fftSize = 256; 769 outAnalyserData = new Uint8Array(outAnalyser.frequencyBinCount); 770 } 771 772 // Override queueAudio to route through analyser for face 773 const _origQueueAudio = queueAudio; 774 queueAudio = function(buffer) { 775 if (!audioCtx) return; 776 initOutputAnalyser(); 777 778 const view = new DataView(buffer); 779 const samples = new Float32Array(buffer.byteLength / 2); 780 let rms = 0; 781 for (let i = 0; i < samples.length; i++) { 782 samples[i] = view.getInt16(i * 2, true) / 32768; 783 rms += samples[i] * samples[i]; 784 } 785 outputLevel = Math.sqrt(rms / samples.length) * 3; 786 isPlaying = true; 787 788 const ab = audioCtx.createBuffer(1, samples.length, SAMPLE_RATE_OUT); 789 ab.copyToChannel(samples, 0); 790 const src = audioCtx.createBufferSource(); 791 src.buffer = ab; 792 793 // Route through analyser for face viseme detection 794 if (outAnalyser) { 795 src.connect(outAnalyser); 796 outAnalyser.connect(audioCtx.destination); 797 } else { 798 src.connect(audioCtx.destination); 799 } 800 801 const now = audioCtx.currentTime; 802 if (nextPlayTime < now) nextPlayTime = now; 803 src.start(nextPlayTime); 804 nextPlayTime += ab.duration; 805 806 clearTimeout(playEndTimer); 807 playEndTimer = setTimeout(() => { 808 if (audioCtx && audioCtx.currentTime >= nextPlayTime - 0.05) { 809 isPlaying = false; 810 outputLevel = 0; 811 } 812 }, (nextPlayTime - now) * 1000 + 100); 813 }; 814 815 // Face animation in the visualization loop 816 const origTick = startVisualization; 817 startVisualization = function() { 818 if (rafId) return; 819 function tick() { 820 rafId = requestAnimationFrame(tick); 821 822 // Mic level 823 if (analyser && analyserData && (state === 'listening' || state === 'idle')) { 824 analyser.getByteFrequencyData(analyserData); 825 let sum = 0; 826 for (let i = 0; i < analyserData.length; i++) sum += analyserData[i]; 827 const raw = sum / analyserData.length / 255; 828 micLevel += (raw - micLevel) * 0.15; 829 } else { 830 micLevel *= 0.9; 831 } 832 833 // Orb animation (same as before) 834 let scale = 1, glow = 0.25; 835 if (state === 'listening') { 836 scale = 1 + micLevel * 0.3; 837 glow = 0.3 + micLevel * 0.5; 838 } else if (state === 'responding') { 839 scale = 1 + outputLevel * 0.2; 840 glow = 0.25 + outputLevel * 0.45; 841 } 842 if (state === 'listening' || state === 'responding') { 843 orb.style.transform = `scale(${scale})`; 844 orb.style.boxShadow = `0 0 ${40 + glow * 60}px color-mix(in srgb, var(--state-color) ${Math.round(glow * 50)}%, transparent), 0 0 ${80 + glow * 40}px color-mix(in srgb, var(--state-color) ${Math.round(glow * 25)}%, transparent)`; 845 volumeRing.style.opacity = (0.3 + micLevel * 0.7).toString(); 846 volumeRing.style.transform = `scale(${1 + micLevel * 0.08})`; 847 } else { 848 orb.style.transform = ''; 849 orb.style.boxShadow = ''; 850 volumeRing.style.opacity = ''; 851 volumeRing.style.transform = ''; 852 } 853 854 // Face animation 855 if (currentView === 'face') { 856 // Viseme from output audio 857 if (outAnalyser && outAnalyserData && state === 'responding') { 858 outAnalyser.getByteFrequencyData(outAnalyserData); 859 const bins = outAnalyserData.length; 860 // Low band (vowels O/U) 861 let low = 0; for (let i = 0; i < bins * 0.15; i++) low += outAnalyserData[i]; 862 low = low / (bins * 0.15) / 255; 863 // Mid band (vowels A/E) 864 let mid = 0; for (let i = Math.floor(bins*0.15); i < bins * 0.4; i++) mid += outAnalyserData[i]; 865 mid = mid / (bins * 0.25) / 255; 866 // High band (consonants) 867 let high = 0; for (let i = Math.floor(bins*0.4); i < bins * 0.7; i++) high += outAnalyserData[i]; 868 high = high / (bins * 0.3) / 255; 869 870 const targetOpen = Math.min(1, (low * 0.7 + mid * 1.0 + high * 0.3) * 2.5); 871 const targetWidth = 0.3 + mid * 0.5 + high * 0.3; 872 const targetRound = low * 0.6; 873 mouthOpen += (targetOpen - mouthOpen) * 0.25; 874 mouthWidth += (targetWidth - mouthWidth) * 0.2; 875 mouthRound += (targetRound - mouthRound) * 0.2; 876 } else { 877 // Return to rest 878 mouthOpen += (0 - mouthOpen) * 0.15; 879 mouthWidth += (0.5 - mouthWidth) * 0.1; 880 mouthRound += (0 - mouthRound) * 0.1; 881 } 882 renderMouth(mouthOpen, mouthWidth, mouthRound); 883 renderBrows(); 884 } 885 } 886 tick(); 887 }; 888 889 function renderMouth(open, width, round) { 890 const cx = 90, cy = 145; 891 const hw = 18 + 14 * width * (1 - round * 0.4); 892 const hh = Math.max(0.5, 22 * open); 893 const cpx = hw * (0.6 + round * 0.3); 894 const cpy = hh * (0.8 + open * 0.2); 895 896 const lip = `M ${cx-hw} ${cy} C ${cx-cpx} ${cy-cpy}, ${cx+cpx} ${cy-cpy}, ${cx+hw} ${cy} M ${cx-hw} ${cy} C ${cx-cpx} ${cy+cpy}, ${cx+cpx} ${cy+cpy}, ${cx+hw} ${cy}`; 897 mouthLip.setAttribute('d', lip); 898 899 if (open > 0.05) { 900 const s = 0.8; 901 const inner = `M ${cx-hw*s} ${cy} C ${cx-cpx*s} ${cy-cpy*s}, ${cx+cpx*s} ${cy-cpy*s}, ${cx+hw*s} ${cy} C ${cx+cpx*s} ${cy+cpy*s}, ${cx-cpx*s} ${cy+cpy*s}, ${cx-hw*s} ${cy} Z`; 902 mouthInner.setAttribute('d', inner); 903 mouthInner.setAttribute('opacity', Math.min(1, open * 3)); 904 if (open > 0.15) { 905 const tw = hw * 0.6, th = Math.min(hh * 0.25, 4); 906 const ty = cy - hh * 0.25; 907 mouthTeeth.setAttribute('d', `M ${cx-tw} ${ty} Q ${cx} ${ty+th}, ${cx+tw} ${ty} L ${cx+tw} ${ty+th*0.5} Q ${cx} ${ty+th*1.2}, ${cx-tw} ${ty+th*0.5} Z`); 908 mouthTeeth.setAttribute('opacity', Math.min(0.8, open * 1.5)); 909 } else { 910 mouthTeeth.setAttribute('opacity', '0'); 911 } 912 } else { 913 mouthInner.setAttribute('opacity', '0'); 914 mouthTeeth.setAttribute('opacity', '0'); 915 } 916 } 917 918 function renderBrows() { 919 const raised = state === 'listening' ? 4 : state === 'error' ? -3 : 0; 920 const furrow = state === 'processing' ? 3 : 0; 921 browL.setAttribute('d', `M 48 ${78-raised+furrow} Q 62 ${72-raised}, 76 ${78-raised-furrow}`); 922 browR.setAttribute('d', `M 104 ${78-raised-furrow} Q 118 ${72-raised}, 132 ${78-raised+furrow}`); 923 } 924 </script> 925 </body> 926 </html>