/ services / pipecat-agent / web-client.html
web-client.html
  1  <!DOCTYPE html>
  2  <html lang="en">
  3  <head>
  4  <meta charset="UTF-8">
  5  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6  <title>Bob Voice Assistant</title>
  7  <style>
  8    :root {
  9      --bg: #0f0f1a;
 10      --surface: #1a1a2e;
 11      --text: #c8c8d8;
 12      --text-dim: #666;
 13      --idle: #4a6fa5;
 14      --listening: #4cc9f0;
 15      --processing: #7b68ee;
 16      --responding: #2ec4b6;
 17      --error: #e63946;
 18      --muted: #444;
 19      --dormant: #2a2a4a;
 20      --orb-size: 140px;
 21    }
 22  
 23    @media (prefers-reduced-motion: reduce) {
 24      *, *::before, *::after { animation-duration: 0.01ms !important; transition-duration: 0.01ms !important; }
 25    }
 26  
 27    * { margin: 0; padding: 0; box-sizing: border-box; }
 28    body {
 29      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
 30      background: var(--bg); color: var(--text);
 31      display: flex; flex-direction: column; align-items: center; justify-content: center;
 32      min-height: 100vh; overflow: hidden;
 33    }
 34  
 35    /* ── Layout ─────────────────────────────────────────── */
 36    .app { display: flex; flex-direction: column; align-items: center; width: 100%; max-width: 480px; height: 100vh; padding: 2rem 1.5rem; }
 37    .header { text-align: center; margin-bottom: 1rem; flex-shrink: 0; }
 38    .header h1 { font-size: 1.1rem; font-weight: 600; color: var(--idle); letter-spacing: 0.15em; text-transform: uppercase; }
 39    .header .sub { font-size: 0.75rem; color: var(--text-dim); margin-top: 0.2rem; }
 40  
 41    /* ── Orb ────────────────────────────────────────────── */
 42    .orb-area { flex: 0 0 auto; display: flex; flex-direction: column; align-items: center; justify-content: center; padding: 2rem 0; }
 43  
 44    .orb-container {
 45      position: relative; width: var(--orb-size); height: var(--orb-size);
 46      cursor: pointer; -webkit-tap-highlight-color: transparent;
 47    }
 48  
 49    .orb {
 50      width: 100%; height: 100%; border-radius: 50%;
 51      background: radial-gradient(circle at 40% 35%, color-mix(in srgb, var(--state-color) 60%, white), var(--state-color), color-mix(in srgb, var(--state-color) 70%, black));
 52      transition: transform 0.15s ease-out, box-shadow 0.3s ease-out, --state-color 0.5s ease-out;
 53      will-change: transform, box-shadow;
 54      --state-color: var(--idle);
 55      box-shadow: 0 0 40px color-mix(in srgb, var(--state-color) 30%, transparent),
 56                  0 0 80px color-mix(in srgb, var(--state-color) 15%, transparent);
 57    }
 58  
 59    /* State-specific orb colors via data attribute */
 60    [data-state="idle"] .orb { --state-color: var(--idle); }
 61    [data-state="listening"] .orb { --state-color: var(--listening); }
 62    [data-state="processing"] .orb { --state-color: var(--processing); }
 63    [data-state="responding"] .orb { --state-color: var(--responding); }
 64    [data-state="error"] .orb { --state-color: var(--error); }
 65    [data-state="muted"] .orb { --state-color: var(--muted); transform: scale(0.85); }
 66    [data-state="dormant"] .orb { --state-color: var(--dormant); transform: scale(0.8); opacity: 0.6; }
 67    [data-state="wakeup"] .orb { --state-color: var(--listening); animation: wakeup 0.6s ease-out; }
 68  
 69    /* Dormant slow pulse */
 70    [data-state="dormant"] .orb { animation: dormant-pulse 5s ease-in-out infinite; }
 71    @keyframes dormant-pulse {
 72      0%, 100% { transform: scale(0.8); opacity: 0.5; }
 73      50% { transform: scale(0.84); opacity: 0.65; }
 74    }
 75  
 76    /* Wake word recognized flash */
 77    @keyframes wakeup {
 78      0% { transform: scale(0.8); filter: brightness(1); }
 79      30% { transform: scale(1.15); filter: brightness(1.8); }
 80      100% { transform: scale(1); filter: brightness(1); }
 81    }
 82  
 83    /* Idle breathing */
 84    [data-state="idle"] .orb { animation: breathe 3.5s ease-in-out infinite; }
 85    @keyframes breathe {
 86      0%, 100% { transform: scale(1); }
 87      50% { transform: scale(1.04); }
 88    }
 89  
 90    /* Processing shimmer */
 91    [data-state="processing"] .orb { animation: shimmer 2s ease-in-out infinite; }
 92    @keyframes shimmer {
 93      0%, 100% { transform: scale(1); filter: brightness(1); }
 94      50% { transform: scale(1.03); filter: brightness(1.15); }
 95    }
 96  
 97    /* Error shake */
 98    [data-state="error"] .orb { animation: shake 0.4s ease-out; }
 99    @keyframes shake {
100      0%, 100% { transform: translateX(0); }
101      15% { transform: translateX(-8px); }
102      30% { transform: translateX(8px); }
103      45% { transform: translateX(-6px); }
104      60% { transform: translateX(6px); }
105      75% { transform: translateX(-3px); }
106      90% { transform: translateX(3px); }
107    }
108  
109    /* ── Volume ring (around orb, shows mic level) ────── */
110    .volume-ring {
111      position: absolute; inset: -6px; border-radius: 50%;
112      border: 2px solid transparent; transition: border-color 0.2s, opacity 0.2s;
113      opacity: 0; pointer-events: none;
114    }
115    [data-state="listening"] .volume-ring { border-color: var(--listening); opacity: 0.5; }
116    [data-state="responding"] .volume-ring { border-color: var(--responding); opacity: 0.4; }
117  
118    /* ── Status text ───────────────────────────────────── */
119    .status {
120      margin-top: 1.2rem; font-size: 0.85rem; color: var(--text-dim);
121      min-height: 1.4em; text-align: center; transition: color 0.3s;
122    }
123    [data-state="listening"] .status { color: var(--listening); }
124    [data-state="processing"] .status { color: var(--processing); }
125    [data-state="responding"] .status { color: var(--responding); }
126    [data-state="error"] .status { color: var(--error); }
127  
128    /* ── Transcript ────────────────────────────────────── */
129    .transcript-area {
130      flex: 1 1 auto; width: 100%; overflow-y: auto; overflow-x: hidden;
131      padding: 0.5rem 0; min-height: 0; /* flex child scrollable */
132      mask-image: linear-gradient(transparent, black 8%, black 92%, transparent);
133      -webkit-mask-image: linear-gradient(transparent, black 8%, black 92%, transparent);
134    }
135    .transcript { display: flex; flex-direction: column; gap: 0.6rem; padding: 0.5rem 0; }
136    .msg { font-size: 0.82rem; line-height: 1.45; padding: 0.4rem 0; border-bottom: 1px solid #ffffff08; }
137    .msg-label { font-weight: 600; font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.08em; margin-bottom: 0.15rem; }
138    .msg-user .msg-label { color: var(--listening); }
139    .msg-bob .msg-label { color: var(--responding); }
140    .msg-error .msg-label { color: var(--error); }
141    .msg-text { color: var(--text); }
142    .msg-text.partial { color: var(--text-dim); font-style: italic; }
143    .msg-text .interrupted { opacity: 0.5; }
144    .msg-text .interrupted::after { content: " (interrupted)"; font-style: italic; font-size: 0.75rem; }
145  
146    /* ── Controls ──────────────────────────────────────── */
147    .controls { flex-shrink: 0; display: flex; gap: 0.8rem; padding: 1rem 0 0.5rem; }
148    .ctrl-btn {
149      padding: 0.5rem 1.2rem; border-radius: 2rem; border: 1px solid #333;
150      background: var(--surface); color: var(--text-dim); font-size: 0.78rem;
151      cursor: pointer; transition: all 0.2s; user-select: none;
152    }
153    .ctrl-btn:hover { border-color: #555; color: var(--text); }
154    .ctrl-btn:active { transform: scale(0.96); }
155    .ctrl-btn.active { border-color: var(--error); color: var(--error); }
156    .ctrl-btn.primary { border-color: var(--idle); color: var(--idle); }
157    .ctrl-btn.primary:hover { border-color: var(--listening); color: var(--listening); background: #16213e; }
158    .ctrl-btn.danger { border-color: var(--error); color: var(--error); }
159  
160    /* ── Face avatar ─────────────────────────────────────── */
161    .face-container { display: none; width: 180px; height: 220px; position: relative; cursor: pointer; }
162    [data-view="face"] .face-container { display: block; }
163    [data-view="face"] .orb-container { display: none; }
164  
165    .face-svg { width: 100%; height: 100%; }
166    .face-head { transition: fill 0.5s; }
167    [data-state="idle"] .face-head { fill: url(#faceGradIdle); }
168    [data-state="listening"] .face-head { fill: url(#faceGradListen); }
169    [data-state="processing"] .face-head { fill: url(#faceGradProcess); }
170    [data-state="responding"] .face-head { fill: url(#faceGradRespond); }
171    [data-state="error"] .face-head { fill: url(#faceGradError); }
172    [data-state="muted"] .face-head { fill: url(#faceGradMuted); }
173  
174    .face-eye { fill: #e0e8f0; transition: ry 0.1s; }
175    .face-pupil { fill: #1a1a2e; transition: cx 0.3s, cy 0.3s; }
176    .face-brow { fill: none; stroke: #c8d8e8; stroke-width: 2.5; stroke-linecap: round; transition: d 0.3s; }
177    .face-mouth-inner { fill: #1a1a2e; opacity: 0; }
178    .face-mouth-lip { fill: none; stroke: #e8b4b8; stroke-width: 2; stroke-linecap: round; stroke-linejoin: round; }
179    .face-teeth { fill: #e8e8f0; opacity: 0; }
180  
181    /* Blink animation */
182    @keyframes blink { 0%,96%,100% { ry: 12px; } 97.5% { ry: 1px; } }
183    .face-eye { animation: blink 4s ease-in-out infinite; }
184    .face-eye:nth-of-type(2) { animation-delay: 0.15s; }
185  
186    /* State expressions */
187    [data-state="listening"] .face-brow { stroke: var(--listening); }
188    [data-state="processing"] .face-brow { stroke: var(--processing); }
189    [data-state="responding"] .face-brow { stroke: var(--responding); }
190    [data-state="error"] .face-eye { fill: #ffcccc; }
191  
192    .view-toggle { font-size: 0.7rem; color: var(--text-dim); cursor: pointer; margin-top: 0.3rem; border: none; background: none; text-decoration: underline; }
193    .view-toggle:hover { color: var(--text); }
194  
195    /* ── Screen reader only ─────────────────────────────── */
196    .sr-only { position: absolute; width: 1px; height: 1px; padding: 0; margin: -1px; overflow: hidden; clip: rect(0,0,0,0); white-space: nowrap; border: 0; }
197  
198    /* ── Scrollbar ──────────────────────────────────────── */
199    .transcript-area::-webkit-scrollbar { width: 4px; }
200    .transcript-area::-webkit-scrollbar-track { background: transparent; }
201    .transcript-area::-webkit-scrollbar-thumb { background: #333; border-radius: 2px; }
202  </style>
203  </head>
204  <body>
205  
206  <div class="app" data-state="idle" id="app">
207    <div class="header">
208      <h1>Bob</h1>
209      <div class="sub">Voice Assistant</div>
210    </div>
211  
212    <div class="orb-area">
213      <div class="orb-container" id="orbContainer" role="button" tabindex="0" aria-label="Activate voice assistant">
214        <div class="orb" id="orb"></div>
215        <div class="volume-ring" id="volumeRing"></div>
216      </div>
217  
218      <!-- Face avatar (hidden by default, toggled with view button) -->
219      <div class="face-container" id="faceContainer">
220        <svg class="face-svg" viewBox="0 0 180 220" xmlns="http://www.w3.org/2000/svg">
221          <defs>
222            <radialGradient id="faceGradIdle" cx="40%" cy="35%"><stop offset="0%" stop-color="#6a8fc5"/><stop offset="100%" stop-color="#3a5a85"/></radialGradient>
223            <radialGradient id="faceGradListen" cx="40%" cy="35%"><stop offset="0%" stop-color="#7ce0f8"/><stop offset="100%" stop-color="#2a9ab8"/></radialGradient>
224            <radialGradient id="faceGradProcess" cx="40%" cy="35%"><stop offset="0%" stop-color="#9b88ee"/><stop offset="100%" stop-color="#5b48be"/></radialGradient>
225            <radialGradient id="faceGradRespond" cx="40%" cy="35%"><stop offset="0%" stop-color="#4ee4d6"/><stop offset="100%" stop-color="#1ea496"/></radialGradient>
226            <radialGradient id="faceGradError" cx="40%" cy="35%"><stop offset="0%" stop-color="#f06976"/><stop offset="100%" stop-color="#c02936"/></radialGradient>
227            <radialGradient id="faceGradMuted" cx="40%" cy="35%"><stop offset="0%" stop-color="#666"/><stop offset="100%" stop-color="#333"/></radialGradient>
228          </defs>
229          <!-- Head -->
230          <ellipse class="face-head" cx="90" cy="110" rx="75" ry="90"/>
231          <!-- Eyes -->
232          <ellipse class="face-eye" cx="62" cy="95" rx="10" ry="12"/>
233          <ellipse class="face-eye" cx="118" cy="95" rx="10" ry="12"/>
234          <!-- Pupils -->
235          <circle class="face-pupil" id="pupilL" cx="62" cy="96" r="5"/>
236          <circle class="face-pupil" id="pupilR" cx="118" cy="96" r="5"/>
237          <!-- Eyebrows -->
238          <path class="face-brow" id="browL" d="M 48 78 Q 62 72, 76 78"/>
239          <path class="face-brow" id="browR" d="M 104 78 Q 118 72, 132 78"/>
240          <!-- Mouth -->
241          <path class="face-mouth-inner" id="mouthInner" d=""/>
242          <path class="face-teeth" id="mouthTeeth" d=""/>
243          <path class="face-mouth-lip" id="mouthLip" d="M 70 145 Q 90 148, 110 145"/>
244        </svg>
245      </div>
246  
247      <button class="view-toggle" id="viewToggle">Switch to face</button>
248      <div class="status" id="statusText">Tap the orb to start</div>
249    </div>
250  
251    <div class="transcript-area" id="transcriptArea">
252      <div class="transcript" id="transcript"></div>
253    </div>
254  
255    <div class="controls">
256      <button class="ctrl-btn primary" id="connectBtn">Connect</button>
257      <button class="ctrl-btn" id="muteBtn" style="display:none">Mute</button>
258      <button class="ctrl-btn danger" id="disconnectBtn" style="display:none">Disconnect</button>
259      <a href="/enroll.html" class="ctrl-btn" style="text-decoration:none;display:inline-block">Enroll Voice</a>
260    </div>
261  
262    <!-- Accessibility: live region for state announcements -->
263    <div aria-live="assertive" aria-atomic="true" class="sr-only" id="a11yAnnounce"></div>
264  </div>
265  
266  <script>
267  // ── Configuration ──────────────────────────────────────
268  const WS_URL = location.hostname.endsWith('.genexergy.org')
269    ? `wss://${location.hostname}/ws`
270    : `ws://${location.hostname}:10700`;
271  const SAMPLE_RATE_IN = 16000;   // mic capture rate (matches STT)
272  const SAMPLE_RATE_OUT = 24000;  // TTS playback rate (matches Kokoro)
273  
274  // ── State ──────────────────────────────────────────────
275  let state = 'idle'; // idle | listening | processing | responding | error | muted
276  let ws = null, audioCtx = null, mediaStream = null, processor = null;
277  let analyser = null, analyserData = null;
278  let isConnected = false, isMuted = false;
279  let micLevel = 0, outputLevel = 0;
280  let rafId = null;
281  let audioQueue = [], isPlaying = false;
282  let errorStrikes = 0;
283  let silenceTimer = null;
284  let wakeWordGateActive = false; // true when wake word gate is in use (dormant/wakeup states)
285  
286  // ── DOM refs ───────────────────────────────────────────
287  const app = document.getElementById('app');
288  const orb = document.getElementById('orb');
289  const volumeRing = document.getElementById('volumeRing');
290  const statusText = document.getElementById('statusText');
291  const transcript = document.getElementById('transcript');
292  const transcriptArea = document.getElementById('transcriptArea');
293  const connectBtn = document.getElementById('connectBtn');
294  const muteBtn = document.getElementById('muteBtn');
295  const disconnectBtn = document.getElementById('disconnectBtn');
296  const a11y = document.getElementById('a11yAnnounce');
297  const orbContainer = document.getElementById('orbContainer');
298  
299  // ── Earcons (generated tones) ──────────────────────────
300  const earcons = {
301    listen: () => playTone([523, 659], 0.15, 0.08),      // C5→E5 ascending
302    wakeup: () => playTone([440, 554, 659], 0.12, 0.1),  // A4→C#5→E5 major triad — wake word recognized
303    process: () => playTone([440], 0.05, 0.04),           // A4 click
304    error: () => playTone([349, 311], 0.25, 0.1),         // F4→Eb4 descending minor
305    complete: () => playTone([392, 523], 0.2, 0.06),      // G4→C5 resolution
306    mute: () => playTone([330], 0.08, 0.05),              // E4 click
307  };
308  
309  function playTone(freqs, dur, vol) {
310    if (!audioCtx) return;
311    freqs.forEach((freq, i) => {
312      const osc = audioCtx.createOscillator();
313      const gain = audioCtx.createGain();
314      osc.type = 'sine';
315      osc.frequency.value = freq;
316      gain.gain.setValueAtTime(0, audioCtx.currentTime + i * dur * 0.6);
317      gain.gain.linearRampToValueAtTime(vol, audioCtx.currentTime + i * dur * 0.6 + 0.01);
318      gain.gain.exponentialRampToValueAtTime(0.001, audioCtx.currentTime + (i + 1) * dur * 0.6 + dur);
319      osc.connect(gain).connect(audioCtx.destination);
320      osc.start(audioCtx.currentTime + i * dur * 0.6);
321      osc.stop(audioCtx.currentTime + (i + 1) * dur * 0.6 + dur + 0.05);
322    });
323  }
324  
325  // ── State Machine ──────────────────────────────────────
326  function setState(newState, statusMsg) {
327    if (state === newState && !statusMsg) return;
328    const prev = state;
329    state = newState;
330    app.dataset.state = newState;
331  
332    // Status text
333    const messages = {
334      idle: isConnected ? 'Ready — speak anytime' : 'Tap the orb to start',
335      dormant: 'Say "Hey Bob" to wake me up',
336      listening: 'Listening...',
337      processing: 'Thinking...',
338      responding: 'Speaking...',
339      error: statusMsg || "Something went wrong",
340      muted: 'Microphone muted',
341    };
342    statusText.textContent = statusMsg || messages[newState] || '';
343  
344    // A11y announcement
345    const announcements = {
346      idle: 'Bob is ready',
347      dormant: 'Say Hey Bob to activate',
348      listening: 'Bob is listening',
349      processing: 'Processing your request',
350      responding: 'Bob is responding',
351      error: statusMsg || 'Error occurred',
352      muted: 'Microphone muted',
353    };
354    a11y.textContent = announcements[newState] || '';
355  
356    // Earcons
357    if (newState === 'wakeup') earcons.wakeup();
358    if (newState === 'listening' && prev !== 'listening' && prev !== 'wakeup') earcons.listen();
359    if (newState === 'processing') earcons.process();
360    if (newState === 'error') earcons.error();
361    if (newState === 'idle' && prev === 'responding') earcons.complete();
362    if (newState === 'dormant' && prev === 'responding') earcons.complete();
363    if (newState === 'muted' || (prev === 'muted' && newState !== 'muted')) earcons.mute();
364  
365    // Auto-recover from error
366    if (newState === 'error') {
367      setTimeout(() => { if (state === 'error') setState('idle'); }, 4000);
368    }
369  
370    // Silence watchdog for processing
371    clearTimeout(silenceTimer);
372    if (newState === 'processing') {
373      silenceTimer = setTimeout(() => {
374        if (state === 'processing') {
375          setState('error', "This is taking too long. Please try again.");
376          errorStrikes++;
377        }
378      }, 15000);
379    }
380  }
381  
382  // ── Audio Visualization Loop ───────────────────────────
383  function startVisualization() {
384    if (rafId) return;
385    function tick() {
386      rafId = requestAnimationFrame(tick);
387  
388      // Mic level from analyser
389      if (analyser && analyserData && (state === 'listening' || state === 'idle')) {
390        analyser.getByteFrequencyData(analyserData);
391        let sum = 0;
392        for (let i = 0; i < analyserData.length; i++) sum += analyserData[i];
393        const raw = sum / analyserData.length / 255;
394        micLevel += (raw - micLevel) * 0.15; // smooth
395      } else {
396        micLevel *= 0.9; // decay
397      }
398  
399      // Apply to orb
400      let scale = 1, glow = 0.25;
401      if (state === 'listening') {
402        scale = 1 + micLevel * 0.3;
403        glow = 0.3 + micLevel * 0.5;
404      } else if (state === 'responding') {
405        scale = 1 + outputLevel * 0.2;
406        glow = 0.25 + outputLevel * 0.45;
407      }
408  
409      if (state === 'listening' || state === 'responding') {
410        orb.style.transform = `scale(${scale})`;
411        orb.style.boxShadow = `0 0 ${40 + glow * 60}px color-mix(in srgb, var(--state-color) ${Math.round(glow * 50)}%, transparent), 0 0 ${80 + glow * 40}px color-mix(in srgb, var(--state-color) ${Math.round(glow * 25)}%, transparent)`;
412        volumeRing.style.opacity = (0.3 + micLevel * 0.7).toString();
413        volumeRing.style.transform = `scale(${1 + micLevel * 0.08})`;
414      } else {
415        orb.style.transform = '';
416        orb.style.boxShadow = '';
417        volumeRing.style.opacity = '';
418        volumeRing.style.transform = '';
419      }
420    }
421    tick();
422  }
423  
424  function stopVisualization() {
425    if (rafId) { cancelAnimationFrame(rafId); rafId = null; }
426  }
427  
428  // ── Transcript ─────────────────────────────────────────
429  let currentUserMsg = null, currentBobMsg = null;
430  
431  function parseSpeakerTag(text) {
432    // Parse "[Cam] tell me a joke" → {speaker: "Cam", text: "tell me a joke"}
433    const match = text.match(/^\[([^\]]+)\]\s*(.*)/s);
434    if (match) return { speaker: match[1], text: match[2] };
435    return { speaker: null, text: text };
436  }
437  
438  function addMessage(who, text, isPartial = false) {
439    if (who === 'user') {
440      // Parse speaker tag from diarization
441      const parsed = parseSpeakerTag(text);
442      const label = parsed.speaker || 'You';
443      text = parsed.text || text;
444  
445      if (isPartial) {
446        if (!currentUserMsg) {
447          currentUserMsg = createMsgElement(label, 'msg-user');
448          transcript.appendChild(currentUserMsg);
449        }
450        currentUserMsg.querySelector('.msg-text').textContent = text;
451        currentUserMsg.querySelector('.msg-label').textContent = label;
452        currentUserMsg.querySelector('.msg-text').classList.toggle('partial', true);
453      } else {
454        if (currentUserMsg) {
455          currentUserMsg.querySelector('.msg-text').textContent = text;
456          currentUserMsg.querySelector('.msg-label').textContent = label;
457          currentUserMsg.querySelector('.msg-text').classList.remove('partial');
458          currentUserMsg = null;
459        } else {
460          const el = createMsgElement(label, 'msg-user');
461          el.querySelector('.msg-text').textContent = text;
462          transcript.appendChild(el);
463        }
464      }
465    } else if (who === 'bob') {
466      if (!currentBobMsg) {
467        currentBobMsg = createMsgElement('Bob', 'msg-bob');
468        transcript.appendChild(currentBobMsg);
469      }
470      const textEl = currentBobMsg.querySelector('.msg-text');
471      textEl.textContent += text;
472    } else if (who === 'error') {
473      const el = createMsgElement('', 'msg-error');
474      el.querySelector('.msg-text').textContent = text;
475      transcript.appendChild(el);
476    }
477    transcriptArea.scrollTop = transcriptArea.scrollHeight;
478  }
479  
480  function finalizeBobMsg() {
481    currentBobMsg = null;
482  }
483  
484  function createMsgElement(label, cls) {
485    const div = document.createElement('div');
486    div.className = `msg ${cls}`;
487    div.innerHTML = `<div class="msg-label">${label}</div><div class="msg-text"></div>`;
488    return div;
489  }
490  
491  // ── Audio Playback (gapless scheduled) ─────────────────
492  let nextPlayTime = 0;
493  let playEndTimer = null;
494  
495  function queueAudio(buffer) {
496    if (!audioCtx) return;
497    const view = new DataView(buffer);
498    const samples = new Float32Array(buffer.byteLength / 2);
499    let rms = 0;
500    for (let i = 0; i < samples.length; i++) {
501      samples[i] = view.getInt16(i * 2, true) / 32768;
502      rms += samples[i] * samples[i];
503    }
504    outputLevel = Math.sqrt(rms / samples.length) * 3;
505    isPlaying = true;
506  
507    const ab = audioCtx.createBuffer(1, samples.length, SAMPLE_RATE_OUT);
508    ab.copyToChannel(samples, 0);
509    const src = audioCtx.createBufferSource();
510    src.buffer = ab;
511    src.connect(audioCtx.destination);
512  
513    // Schedule gaplessly: each chunk starts exactly when the previous ends
514    const now = audioCtx.currentTime;
515    if (nextPlayTime < now) nextPlayTime = now;
516    src.start(nextPlayTime);
517    nextPlayTime += ab.duration;
518  
519    // Set a timer to detect when all audio has finished
520    clearTimeout(playEndTimer);
521    playEndTimer = setTimeout(() => {
522      if (audioCtx && audioCtx.currentTime >= nextPlayTime - 0.05) {
523        isPlaying = false;
524        outputLevel = 0;
525      }
526    }, (nextPlayTime - now) * 1000 + 100);
527  }
528  
529  function stopPlayback() {
530    nextPlayTime = 0;
531    isPlaying = false;
532    outputLevel = 0;
533    clearTimeout(playEndTimer);
534  }
535  
536  // ── WebSocket Connection ───────────────────────────────
537  async function connect() {
538    connectBtn.style.display = 'none';
539    setState('idle', 'Connecting...');
540  
541    try {
542      mediaStream = await navigator.mediaDevices.getUserMedia({
543        audio: { sampleRate: SAMPLE_RATE_IN, channelCount: 1, echoCancellation: true, noiseSuppression: true }
544      });
545  
546      // Separate contexts: input at 16kHz for STT/wake word, output at 24kHz for TTS playback
547      const inputCtx = new AudioContext({ sampleRate: SAMPLE_RATE_IN });
548      audioCtx = new AudioContext({ sampleRate: SAMPLE_RATE_OUT });
549      const source = inputCtx.createMediaStreamSource(mediaStream);
550  
551      // Analyser for visualization (on input context)
552      analyser = inputCtx.createAnalyser();
553      analyser.fftSize = 256;
554      analyserData = new Uint8Array(analyser.frequencyBinCount);
555      source.connect(analyser);
556  
557      // PCM processor for sending audio (on input context at 16kHz)
558      await inputCtx.audioWorklet.addModule(URL.createObjectURL(new Blob([`
559        class PCMProcessor extends AudioWorkletProcessor {
560          process(inputs) {
561            const input = inputs[0][0];
562            if (input) {
563              const pcm16 = new Int16Array(input.length);
564              for (let i = 0; i < input.length; i++) {
565                pcm16[i] = Math.max(-32768, Math.min(32767, Math.round(input[i] * 32767)));
566              }
567              this.port.postMessage(pcm16.buffer, [pcm16.buffer]);
568            }
569            return true;
570          }
571        }
572        registerProcessor('pcm-processor', PCMProcessor);
573      `], { type: 'application/javascript' })));
574  
575      processor = new AudioWorkletNode(inputCtx, 'pcm-processor');
576      source.connect(processor);
577  
578      // WebSocket
579      ws = new WebSocket(WS_URL);
580      ws.binaryType = 'arraybuffer';
581  
582      ws.onopen = () => {
583        isConnected = true;
584        errorStrikes = 0;
585        wakeWordGateActive = false; // Will be set true if server sends wake-word-state
586        muteBtn.style.display = '';
587        disconnectBtn.style.display = '';
588        setState('idle', 'Connected — waiting for wake word status...');
589        startVisualization();
590      };
591  
592      ws.onmessage = (event) => {
593        if (event.data instanceof ArrayBuffer) {
594          if (state !== 'responding') setState('responding');
595          queueAudio(event.data);
596        } else {
597          try {
598            const msg = JSON.parse(event.data);
599            if (msg.label || msg.type) console.log('WS:', JSON.stringify(msg).slice(0, 150));
600            // RTVI protocol messages (label: "rtvi-ai")
601            if (msg.label === 'rtvi-ai') {
602              const t = msg.type;
603              const d = msg.data || {};
604              if (t === 'wake-word-state') {
605                wakeWordGateActive = true;
606                if (d.state === 'waiting') {
607                  setState('dormant');
608                } else if (d.state === 'activated') {
609                  setState('wakeup');
610                  setTimeout(() => { if (state === 'wakeup') setState('listening'); }, 700);
611                }
612              } else if (t === 'user-transcription') {
613                addMessage('user', d.text, false);
614                setState('processing');
615              } else if (t === 'user-started-speaking') {
616                // Only transition to listening if wake word gate is not active (or already past dormant)
617                if (!wakeWordGateActive || (state !== 'dormant' && state !== 'idle')) {
618                  stopPlayback();
619                  setState('listening');
620                }
621              } else if (t === 'bot-llm-text' && d.text) {
622                // Strip think tags from transcript
623                let txt = d.text;
624                if (txt.includes('<think>')) { window._inThink = true; txt = txt.replace(/<think>[\s\S]*/,''); }
625                if (txt.includes('</think>')) { window._inThink = false; txt = txt.replace(/[\s\S]*<\/think>/,''); }
626                if (window._inThink) txt = '';
627                txt = txt.replace(/<think>[\s\S]*?<\/think>/g, '');
628                if (txt) addMessage('bob', txt);
629              } else if (t === 'bot-llm-started') {
630                setState('processing');
631              } else if (t === 'bot-tts-started') {
632                setState('responding');
633              } else if (t === 'bot-tts-stopped') {
634                finalizeBobMsg();
635                setState('listening');
636              }
637            }
638            // Direct Pipecat frame messages (no label)
639            else if (msg.type === 'UserTranscriptionFrame' || msg.type === 'transcript') {
640              addMessage('user', msg.text, false);
641              setState('processing');
642            } else if (msg.type === 'InterimTranscriptionFrame') {
643              addMessage('user', msg.text, true);
644            } else if (msg.type === 'TextFrame' || msg.type === 'bot_transcript') {
645              addMessage('bob', msg.text);
646            } else if (msg.type === 'BotStartedSpeakingFrame') {
647              setState('responding');
648            } else if (msg.type === 'BotStoppedSpeakingFrame') {
649              finalizeBobMsg();
650              setState('listening');
651            } else if (msg.type === 'UserStartedSpeakingFrame') {
652              audioQueue = []; isPlaying = false;
653              setState('listening');
654            }
655          } catch(e) {}
656        }
657      };
658  
659      ws.onclose = () => { disconnect(); };
660      ws.onerror = () => {
661        setState('error', 'Connection lost');
662        disconnect();
663      };
664  
665      processor.port.onmessage = (e) => {
666        if (ws && ws.readyState === WebSocket.OPEN && !isMuted) {
667          ws.send(e.data);
668        }
669      };
670  
671    } catch(err) {
672      setState('error', 'Microphone access denied');
673      connectBtn.style.display = '';
674    }
675  }
676  
677  function disconnect() {
678    stopVisualization();
679    if (ws) { ws.close(); ws = null; }
680    if (processor) { processor.disconnect(); processor = null; }
681    if (analyser) { analyser = null; analyserData = null; }
682    if (audioCtx) { audioCtx.close().catch(()=>{}); audioCtx = null; }
683    if (mediaStream) { mediaStream.getTracks().forEach(t => t.stop()); mediaStream = null; }
684    isConnected = false; isMuted = false; wakeWordGateActive = false;
685    audioQueue = []; isPlaying = false;
686    muteBtn.style.display = 'none';
687    disconnectBtn.style.display = 'none';
688    connectBtn.style.display = '';
689    setState('idle', 'Disconnected. Tap to reconnect.');
690  }
691  
692  function toggleMute() {
693    isMuted = !isMuted;
694    muteBtn.textContent = isMuted ? 'Unmute' : 'Mute';
695    muteBtn.classList.toggle('active', isMuted);
696    if (isMuted) {
697      setState('muted');
698    } else {
699      setState('listening');
700    }
701  }
702  
703  // ── Event Listeners ────────────────────────────────────
704  connectBtn.addEventListener('click', connect);
705  disconnectBtn.addEventListener('click', disconnect);
706  muteBtn.addEventListener('click', toggleMute);
707  
708  orbContainer.addEventListener('click', () => {
709    if (!isConnected) { connect(); return; }
710    if (isMuted) { toggleMute(); return; }
711    if (state === 'idle') setState('listening');
712  });
713  
714  // Keyboard shortcuts
715  document.addEventListener('keydown', (e) => {
716    if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
717    if (e.code === 'Space') { e.preventDefault(); if (!isConnected) connect(); else if (state === 'idle') setState('listening'); }
718    if (e.code === 'Escape') { if (isConnected) disconnect(); }
719    if (e.code === 'KeyM') { if (isConnected) toggleMute(); }
720  });
721  
722  // ── Face Avatar ────────────────────────────────────────
723  const viewToggle = document.getElementById('viewToggle');
724  const faceContainer = document.getElementById('faceContainer');
725  const mouthLip = document.getElementById('mouthLip');
726  const mouthInner = document.getElementById('mouthInner');
727  const mouthTeeth = document.getElementById('mouthTeeth');
728  const browL = document.getElementById('browL');
729  const browR = document.getElementById('browR');
730  const pupilL = document.getElementById('pupilL');
731  const pupilR = document.getElementById('pupilR');
732  let currentView = 'orb';
733  
734  viewToggle.addEventListener('click', () => {
735    currentView = currentView === 'orb' ? 'face' : 'orb';
736    app.dataset.view = currentView;
737    viewToggle.textContent = currentView === 'orb' ? 'Switch to face' : 'Switch to orb';
738  });
739  
740  faceContainer.addEventListener('click', () => {
741    if (!isConnected) { connect(); return; }
742    if (isMuted) { toggleMute(); return; }
743  });
744  
745  // Pupil tracking (follows mouse)
746  document.addEventListener('mousemove', (e) => {
747    if (currentView !== 'face') return;
748    const rect = faceContainer.getBoundingClientRect();
749    const cx = rect.left + rect.width / 2;
750    const cy = rect.top + rect.height * 0.43;
751    const dx = (e.clientX - cx) / window.innerWidth * 4;
752    const dy = (e.clientY - cy) / window.innerHeight * 3;
753    const clamp = (v, min, max) => Math.max(min, Math.min(max, v));
754    pupilL.setAttribute('cx', 62 + clamp(dx, -3, 3));
755    pupilL.setAttribute('cy', 96 + clamp(dy, -2, 2));
756    pupilR.setAttribute('cx', 118 + clamp(dx, -3, 3));
757    pupilR.setAttribute('cy', 96 + clamp(dy, -2, 2));
758  });
759  
760  // Mouth shape driven by output audio level
761  let mouthOpen = 0, mouthWidth = 0.5, mouthRound = 0;
762  // Output audio analyser (connect during playback)
763  let outAnalyser = null, outAnalyserData = null;
764  
765  function initOutputAnalyser() {
766    if (outAnalyser || !audioCtx) return;
767    outAnalyser = audioCtx.createAnalyser();
768    outAnalyser.fftSize = 256;
769    outAnalyserData = new Uint8Array(outAnalyser.frequencyBinCount);
770  }
771  
772  // Override queueAudio to route through analyser for face
773  const _origQueueAudio = queueAudio;
774  queueAudio = function(buffer) {
775    if (!audioCtx) return;
776    initOutputAnalyser();
777  
778    const view = new DataView(buffer);
779    const samples = new Float32Array(buffer.byteLength / 2);
780    let rms = 0;
781    for (let i = 0; i < samples.length; i++) {
782      samples[i] = view.getInt16(i * 2, true) / 32768;
783      rms += samples[i] * samples[i];
784    }
785    outputLevel = Math.sqrt(rms / samples.length) * 3;
786    isPlaying = true;
787  
788    const ab = audioCtx.createBuffer(1, samples.length, SAMPLE_RATE_OUT);
789    ab.copyToChannel(samples, 0);
790    const src = audioCtx.createBufferSource();
791    src.buffer = ab;
792  
793    // Route through analyser for face viseme detection
794    if (outAnalyser) {
795      src.connect(outAnalyser);
796      outAnalyser.connect(audioCtx.destination);
797    } else {
798      src.connect(audioCtx.destination);
799    }
800  
801    const now = audioCtx.currentTime;
802    if (nextPlayTime < now) nextPlayTime = now;
803    src.start(nextPlayTime);
804    nextPlayTime += ab.duration;
805  
806    clearTimeout(playEndTimer);
807    playEndTimer = setTimeout(() => {
808      if (audioCtx && audioCtx.currentTime >= nextPlayTime - 0.05) {
809        isPlaying = false;
810        outputLevel = 0;
811      }
812    }, (nextPlayTime - now) * 1000 + 100);
813  };
814  
815  // Face animation in the visualization loop
816  const origTick = startVisualization;
817  startVisualization = function() {
818    if (rafId) return;
819    function tick() {
820      rafId = requestAnimationFrame(tick);
821  
822      // Mic level
823      if (analyser && analyserData && (state === 'listening' || state === 'idle')) {
824        analyser.getByteFrequencyData(analyserData);
825        let sum = 0;
826        for (let i = 0; i < analyserData.length; i++) sum += analyserData[i];
827        const raw = sum / analyserData.length / 255;
828        micLevel += (raw - micLevel) * 0.15;
829      } else {
830        micLevel *= 0.9;
831      }
832  
833      // Orb animation (same as before)
834      let scale = 1, glow = 0.25;
835      if (state === 'listening') {
836        scale = 1 + micLevel * 0.3;
837        glow = 0.3 + micLevel * 0.5;
838      } else if (state === 'responding') {
839        scale = 1 + outputLevel * 0.2;
840        glow = 0.25 + outputLevel * 0.45;
841      }
842      if (state === 'listening' || state === 'responding') {
843        orb.style.transform = `scale(${scale})`;
844        orb.style.boxShadow = `0 0 ${40 + glow * 60}px color-mix(in srgb, var(--state-color) ${Math.round(glow * 50)}%, transparent), 0 0 ${80 + glow * 40}px color-mix(in srgb, var(--state-color) ${Math.round(glow * 25)}%, transparent)`;
845        volumeRing.style.opacity = (0.3 + micLevel * 0.7).toString();
846        volumeRing.style.transform = `scale(${1 + micLevel * 0.08})`;
847      } else {
848        orb.style.transform = '';
849        orb.style.boxShadow = '';
850        volumeRing.style.opacity = '';
851        volumeRing.style.transform = '';
852      }
853  
854      // Face animation
855      if (currentView === 'face') {
856        // Viseme from output audio
857        if (outAnalyser && outAnalyserData && state === 'responding') {
858          outAnalyser.getByteFrequencyData(outAnalyserData);
859          const bins = outAnalyserData.length;
860          // Low band (vowels O/U)
861          let low = 0; for (let i = 0; i < bins * 0.15; i++) low += outAnalyserData[i];
862          low = low / (bins * 0.15) / 255;
863          // Mid band (vowels A/E)
864          let mid = 0; for (let i = Math.floor(bins*0.15); i < bins * 0.4; i++) mid += outAnalyserData[i];
865          mid = mid / (bins * 0.25) / 255;
866          // High band (consonants)
867          let high = 0; for (let i = Math.floor(bins*0.4); i < bins * 0.7; i++) high += outAnalyserData[i];
868          high = high / (bins * 0.3) / 255;
869  
870          const targetOpen = Math.min(1, (low * 0.7 + mid * 1.0 + high * 0.3) * 2.5);
871          const targetWidth = 0.3 + mid * 0.5 + high * 0.3;
872          const targetRound = low * 0.6;
873          mouthOpen += (targetOpen - mouthOpen) * 0.25;
874          mouthWidth += (targetWidth - mouthWidth) * 0.2;
875          mouthRound += (targetRound - mouthRound) * 0.2;
876        } else {
877          // Return to rest
878          mouthOpen += (0 - mouthOpen) * 0.15;
879          mouthWidth += (0.5 - mouthWidth) * 0.1;
880          mouthRound += (0 - mouthRound) * 0.1;
881        }
882        renderMouth(mouthOpen, mouthWidth, mouthRound);
883        renderBrows();
884      }
885    }
886    tick();
887  };
888  
889  function renderMouth(open, width, round) {
890    const cx = 90, cy = 145;
891    const hw = 18 + 14 * width * (1 - round * 0.4);
892    const hh = Math.max(0.5, 22 * open);
893    const cpx = hw * (0.6 + round * 0.3);
894    const cpy = hh * (0.8 + open * 0.2);
895  
896    const lip = `M ${cx-hw} ${cy} C ${cx-cpx} ${cy-cpy}, ${cx+cpx} ${cy-cpy}, ${cx+hw} ${cy} M ${cx-hw} ${cy} C ${cx-cpx} ${cy+cpy}, ${cx+cpx} ${cy+cpy}, ${cx+hw} ${cy}`;
897    mouthLip.setAttribute('d', lip);
898  
899    if (open > 0.05) {
900      const s = 0.8;
901      const inner = `M ${cx-hw*s} ${cy} C ${cx-cpx*s} ${cy-cpy*s}, ${cx+cpx*s} ${cy-cpy*s}, ${cx+hw*s} ${cy} C ${cx+cpx*s} ${cy+cpy*s}, ${cx-cpx*s} ${cy+cpy*s}, ${cx-hw*s} ${cy} Z`;
902      mouthInner.setAttribute('d', inner);
903      mouthInner.setAttribute('opacity', Math.min(1, open * 3));
904      if (open > 0.15) {
905        const tw = hw * 0.6, th = Math.min(hh * 0.25, 4);
906        const ty = cy - hh * 0.25;
907        mouthTeeth.setAttribute('d', `M ${cx-tw} ${ty} Q ${cx} ${ty+th}, ${cx+tw} ${ty} L ${cx+tw} ${ty+th*0.5} Q ${cx} ${ty+th*1.2}, ${cx-tw} ${ty+th*0.5} Z`);
908        mouthTeeth.setAttribute('opacity', Math.min(0.8, open * 1.5));
909      } else {
910        mouthTeeth.setAttribute('opacity', '0');
911      }
912    } else {
913      mouthInner.setAttribute('opacity', '0');
914      mouthTeeth.setAttribute('opacity', '0');
915    }
916  }
917  
918  function renderBrows() {
919    const raised = state === 'listening' ? 4 : state === 'error' ? -3 : 0;
920    const furrow = state === 'processing' ? 3 : 0;
921    browL.setAttribute('d', `M 48 ${78-raised+furrow} Q 62 ${72-raised}, 76 ${78-raised-furrow}`);
922    browR.setAttribute('d', `M 104 ${78-raised-furrow} Q 118 ${72-raised}, 132 ${78-raised+furrow}`);
923  }
924  </script>
925  </body>
926  </html>