/ src / modules / ZoomIt / ZoomIt / AudioSampleGenerator.cpp
AudioSampleGenerator.cpp
  1  #include "pch.h"
  2  #include "AudioSampleGenerator.h"
  3  #include "CaptureFrameWait.h"
  4  #include "LoopbackCapture.h"
  5  #include <wrl/client.h>
  6  
  7  extern TCHAR g_MicrophoneDeviceId[];
  8  
  9  namespace
 10  {
 11      // Declare the IMemoryBufferByteAccess interface for accessing raw buffer data
 12      MIDL_INTERFACE("5b0d3235-4dba-4d44-8657-1f1d0f83e9a3")
 13      IMemoryBufferByteAccess : public IUnknown
 14      {
 15      public:
 16          virtual HRESULT STDMETHODCALLTYPE GetBuffer(
 17              BYTE** value,
 18              UINT32* capacity) = 0;
 19      };
 20  }
 21  
 22  namespace winrt
 23  {
 24      using namespace Windows::Foundation;
 25      using namespace Windows::Storage;
 26      using namespace Windows::Storage::Streams;
 27      using namespace Windows::Media;
 28      using namespace Windows::Media::Audio;
 29      using namespace Windows::Media::Capture;
 30      using namespace Windows::Media::Core;
 31      using namespace Windows::Media::Render;
 32      using namespace Windows::Media::MediaProperties;
 33      using namespace Windows::Media::Devices;
 34      using namespace Windows::Devices::Enumeration;
 35  }
 36  
 37  AudioSampleGenerator::AudioSampleGenerator(bool captureMicrophone, bool captureSystemAudio)
 38      : m_captureMicrophone(captureMicrophone)
 39      , m_captureSystemAudio(captureSystemAudio)
 40  {
 41      OutputDebugStringA(("AudioSampleGenerator created, captureMicrophone=" +
 42          std::string(captureMicrophone ? "true" : "false") +
 43          ", captureSystemAudio=" + std::string(captureSystemAudio ? "true" : "false") + "\n").c_str());
 44      m_audioEvent.create(wil::EventOptions::ManualReset);
 45      m_endEvent.create(wil::EventOptions::ManualReset);
 46      m_startEvent.create(wil::EventOptions::ManualReset);
 47      m_asyncInitialized.create(wil::EventOptions::ManualReset);
 48  }
 49  
 50  AudioSampleGenerator::~AudioSampleGenerator()
 51  {
 52      Stop();
 53      if (m_audioGraph)
 54      {
 55          m_audioGraph.Close();
 56      }
 57  }
 58  
 59  winrt::IAsyncAction AudioSampleGenerator::InitializeAsync()
 60  {
 61      auto expected = false;
 62      if (m_initialized.compare_exchange_strong(expected, true))
 63      {
 64          // Reset state in case this instance is reused.
 65          m_endEvent.ResetEvent();
 66          m_startEvent.ResetEvent();
 67  
 68          // Initialize the audio graph
 69          auto audioGraphSettings = winrt::AudioGraphSettings(winrt::AudioRenderCategory::Media);
 70          auto audioGraphResult = co_await winrt::AudioGraph::CreateAsync(audioGraphSettings);
 71          if (audioGraphResult.Status() != winrt::AudioGraphCreationStatus::Success)
 72          {
 73              throw winrt::hresult_error(E_FAIL, L"Failed to initialize AudioGraph!");
 74          }
 75          m_audioGraph = audioGraphResult.Graph();
 76  
 77          // Get AudioGraph encoding properties for resampling
 78          auto graphProps = m_audioGraph.EncodingProperties();
 79          m_graphSampleRate = graphProps.SampleRate();
 80          m_graphChannels = graphProps.ChannelCount();
 81  
 82          OutputDebugStringA(("AudioGraph initialized: " + std::to_string(m_graphSampleRate) +
 83              " Hz, " + std::to_string(m_graphChannels) + " ch\n").c_str());
 84  
 85          // Create submix node to mix microphone and loopback audio
 86          m_submixNode = m_audioGraph.CreateSubmixNode();
 87          m_audioOutputNode = m_audioGraph.CreateFrameOutputNode();
 88          m_submixNode.AddOutgoingConnection(m_audioOutputNode);
 89  
 90          // Initialize WASAPI loopback capture for system audio (if enabled)
 91          if (m_captureSystemAudio)
 92          {
 93              m_loopbackCapture = std::make_unique<LoopbackCapture>();
 94          }
 95          if (m_loopbackCapture && SUCCEEDED(m_loopbackCapture->Initialize()))
 96          {
 97              auto loopbackFormat = m_loopbackCapture->GetFormat();
 98              if (loopbackFormat)
 99              {
100                  m_loopbackChannels = loopbackFormat->nChannels;
101                  m_loopbackSampleRate = loopbackFormat->nSamplesPerSec;
102                  m_resampleRatio = static_cast<double>(m_loopbackSampleRate) / static_cast<double>(m_graphSampleRate);
103  
104                  OutputDebugStringA(("Loopback initialized: " + std::to_string(m_loopbackSampleRate) +
105                      " Hz, " + std::to_string(m_loopbackChannels) + " ch, resample ratio=" +
106                      std::to_string(m_resampleRatio) + "\n").c_str());
107              }
108          }
109          else if (m_captureSystemAudio)
110          {
111              OutputDebugStringA("WARNING: Failed to initialize loopback capture\n");
112              m_loopbackCapture.reset();
113          }
114  
115          // Always initialize a microphone input node to keep the AudioGraph running at real-time pace.
116          // When mic capture is disabled, we mute it so only loopback audio is captured.
117          {
118              auto defaultMicrophoneId = winrt::MediaDevice::GetDefaultAudioCaptureId(winrt::AudioDeviceRole::Default);
119              auto microphoneId = (m_captureMicrophone && g_MicrophoneDeviceId[0] != 0)
120                  ? winrt::to_hstring(g_MicrophoneDeviceId)
121                  : defaultMicrophoneId;
122              if (!microphoneId.empty())
123              {
124                  auto microphone = co_await winrt::DeviceInformation::CreateFromIdAsync(microphoneId);
125  
126                  // Initialize audio input node
127                  auto inputNodeResult = co_await m_audioGraph.CreateDeviceInputNodeAsync(winrt::MediaCategory::Media, m_audioGraph.EncodingProperties(), microphone);
128                  if (inputNodeResult.Status() != winrt::AudioDeviceNodeCreationStatus::Success && microphoneId != defaultMicrophoneId)
129                  {
130                      // If the selected microphone failed, try again with the default
131                      microphone = co_await winrt::DeviceInformation::CreateFromIdAsync(defaultMicrophoneId);
132                      inputNodeResult = co_await m_audioGraph.CreateDeviceInputNodeAsync(winrt::MediaCategory::Media, m_audioGraph.EncodingProperties(), microphone);
133                  }
134                  if (inputNodeResult.Status() == winrt::AudioDeviceNodeCreationStatus::Success)
135                  {
136                      m_audioInputNode = inputNodeResult.DeviceInputNode();
137                      m_audioInputNode.AddOutgoingConnection(m_submixNode);
138  
139                      // If mic capture is disabled, mute the input so only loopback is captured
140                      if (!m_captureMicrophone)
141                      {
142                          m_audioInputNode.OutgoingGain(0.0);
143                          OutputDebugStringA("Mic input created but muted (loopback-only mode)\n");
144                      }
145                      else
146                      {
147                          OutputDebugStringA("Mic input created and active\n");
148                      }
149                  }
150              }
151          }
152  
153          // Loopback capture is only required when system audio capture is enabled
154          if (m_captureSystemAudio && !m_loopbackCapture)
155          {
156              throw winrt::hresult_error(E_FAIL, L"Failed to initialize loopback audio capture!");
157          }
158  
159          m_audioGraph.QuantumStarted({ this, &AudioSampleGenerator::OnAudioQuantumStarted });
160  
161          m_asyncInitialized.SetEvent();
162      }
163  }
164  
165  winrt::AudioEncodingProperties AudioSampleGenerator::GetEncodingProperties()
166  {
167      CheckInitialized();
168      return m_audioOutputNode.EncodingProperties();
169  }
170  
171  std::optional<winrt::MediaStreamSample> AudioSampleGenerator::TryGetNextSample()
172  {
173      CheckInitialized();
174  
175      // The MediaStreamSource can request audio samples before we've started the audio graph.
176      // Instead of throwing (which crashes the app), wait until either Start() is called
177      // or Stop() signals end-of-stream.
178      if (!m_started.load())
179      {
180          std::vector<HANDLE> events = { m_endEvent.get(), m_startEvent.get() };
181          auto waitResult = WaitForMultipleObjectsEx(static_cast<DWORD>(events.size()), events.data(), false, INFINITE, false);
182          auto eventIndex = -1;
183          switch (waitResult)
184          {
185          case WAIT_OBJECT_0:
186          case WAIT_OBJECT_0 + 1:
187              eventIndex = waitResult - WAIT_OBJECT_0;
188              break;
189          }
190          WINRT_VERIFY(eventIndex >= 0);
191  
192          if (events[eventIndex] == m_endEvent.get())
193          {
194              // End event signaled, but check if there are any remaining samples in the queue
195              auto lock = m_lock.lock_exclusive();
196              if (!m_samples.empty())
197              {
198                  std::optional result(m_samples.front());
199                  m_samples.pop_front();
200                  return result;
201              }
202              return std::nullopt;
203          }
204      }
205  
206      {
207          auto lock = m_lock.lock_exclusive();
208          if (m_samples.empty() && m_endEvent.is_signaled())
209          {
210              return std::nullopt;
211          }
212          else if (!m_samples.empty())
213          {
214              std::optional result(m_samples.front());
215              m_samples.pop_front();
216              return result;
217          }
218      }
219  
220      m_audioEvent.ResetEvent();
221      std::vector<HANDLE> events = { m_endEvent.get(), m_audioEvent.get() };
222      auto waitResult = WaitForMultipleObjectsEx(static_cast<DWORD>(events.size()), events.data(), false, INFINITE, false);
223      auto eventIndex = -1;
224      switch (waitResult)
225      {
226      case WAIT_OBJECT_0:
227      case WAIT_OBJECT_0 + 1:
228          eventIndex = waitResult - WAIT_OBJECT_0;
229          break;
230      }
231      WINRT_VERIFY(eventIndex >= 0);
232  
233      auto signaledEvent = events[eventIndex];
234      if (signaledEvent == m_endEvent.get())
235      {
236          // End was signaled, but check for any remaining samples before returning nullopt
237          auto lock = m_lock.lock_exclusive();
238          if (!m_samples.empty())
239          {
240              std::optional result(m_samples.front());
241              m_samples.pop_front();
242              return result;
243          }
244          return std::nullopt;
245      }
246      else
247      {
248          auto lock = m_lock.lock_exclusive();
249          if (m_samples.empty())
250          {
251              // Spurious wake or race - no samples available
252              // If end is signaled, return nullopt
253              return m_endEvent.is_signaled() ? std::nullopt : std::optional<winrt::MediaStreamSample>{};
254          }
255          std::optional result(m_samples.front());
256          m_samples.pop_front();
257          return result;
258      }
259  }
260  
261  void AudioSampleGenerator::Start()
262  {
263      CheckInitialized();
264      auto expected = false;
265      if (m_started.compare_exchange_strong(expected, true))
266      {
267          m_endEvent.ResetEvent();
268          m_startEvent.SetEvent();
269  
270          // Start loopback capture if available
271          if (m_loopbackCapture)
272          {
273              // Clear any stale samples
274              {
275                  auto lock = m_loopbackBufferLock.lock_exclusive();
276                  m_loopbackBuffer.clear();
277              }
278  
279              m_resampleInputBuffer.clear();
280              m_resampleInputPos = 0.0;
281  
282              m_loopbackCapture->Start();
283          }
284  
285          m_audioGraph.Start();
286      }
287  }
288  
289  void AudioSampleGenerator::Stop()
290  {
291      // Stop may be called during teardown even if initialization hasn't completed.
292      // It must never throw.
293  
294      if (!m_initialized.load())
295      {
296          m_endEvent.SetEvent();
297          return;
298      }
299  
300      m_asyncInitialized.wait();
301  
302      // Stop loopback capture first
303      if (m_loopbackCapture)
304      {
305          m_loopbackCapture->Stop();
306      }
307  
308      // Flush any remaining samples from the loopback capture before stopping the audio graph
309      FlushRemainingAudio();
310  
311      // Stop the audio graph - no more quantum callbacks will run
312      m_audioGraph.Stop();
313  
314      // Close the microphone input node to release the device so Windows no longer
315      // reports the microphone as in use by ZoomIt.
316      if (m_audioInputNode)
317      {
318          m_audioInputNode.Close();
319          m_audioInputNode = nullptr;
320      }
321  
322      // Mark as stopped
323      m_started.store(false);
324  
325      // Combine all remaining queued samples into one final sample so it can be
326      // returned immediately without waiting for additional TryGetNextSample calls
327      CombineQueuedSamples();
328  
329      // NOW signal end event - this allows TryGetNextSample to return remaining
330      // queued samples and then return nullopt
331      m_endEvent.SetEvent();
332      m_audioEvent.SetEvent(); // Also wake any waiting TryGetNextSample
333  
334      // DO NOT clear m_loopbackBuffer or m_samples here - allow MediaTranscoder to
335      // consume remaining queued audio samples to avoid audio cutoff at end of recording.
336      // TryGetNextSample() will return nullopt once m_samples is empty and
337      // m_endEvent is signaled. Buffers will be cleaned up on destruction.
338  }
339  
340  void AudioSampleGenerator::AppendResampledLoopbackSamples(std::vector<float> const& rawLoopbackSamples, bool flushRemaining)
341  {
342      if (rawLoopbackSamples.empty())
343      {
344          return;
345      }
346  
347      m_resampleInputBuffer.insert(m_resampleInputBuffer.end(), rawLoopbackSamples.begin(), rawLoopbackSamples.end());
348  
349      if (m_loopbackChannels == 0 || m_graphChannels == 0 || m_resampleRatio <= 0.0)
350      {
351          return;
352      }
353  
354      std::vector<float> resampledSamples;
355      while (true)
356      {
357          const uint32_t inputFrames = static_cast<uint32_t>(m_resampleInputBuffer.size() / m_loopbackChannels);
358          if (inputFrames == 0)
359          {
360              break;
361          }
362  
363          if (!flushRemaining)
364          {
365              if (inputFrames < 2 || (m_resampleInputPos + 1.0) >= inputFrames)
366              {
367                  break;
368              }
369          }
370          else
371          {
372              if (m_resampleInputPos >= inputFrames)
373              {
374                  break;
375              }
376          }
377  
378          uint32_t inputFrame = static_cast<uint32_t>(m_resampleInputPos);
379          double frac = m_resampleInputPos - inputFrame;
380          uint32_t nextFrame = (inputFrame + 1 < inputFrames) ? (inputFrame + 1) : inputFrame;
381  
382          for (uint32_t outCh = 0; outCh < m_graphChannels; outCh++)
383          {
384              float sample = 0.0f;
385  
386              if (m_loopbackChannels == m_graphChannels)
387              {
388                  uint32_t idx1 = inputFrame * m_loopbackChannels + outCh;
389                  uint32_t idx2 = nextFrame * m_loopbackChannels + outCh;
390                  float s1 = m_resampleInputBuffer[idx1];
391                  float s2 = m_resampleInputBuffer[idx2];
392                  sample = static_cast<float>(s1 * (1.0 - frac) + s2 * frac);
393              }
394              else if (m_loopbackChannels > m_graphChannels)
395              {
396                  float sum = 0.0f;
397                  for (uint32_t inCh = 0; inCh < m_loopbackChannels; inCh++)
398                  {
399                      uint32_t idx1 = inputFrame * m_loopbackChannels + inCh;
400                      uint32_t idx2 = nextFrame * m_loopbackChannels + inCh;
401                      float s1 = m_resampleInputBuffer[idx1];
402                      float s2 = m_resampleInputBuffer[idx2];
403                      sum += static_cast<float>(s1 * (1.0 - frac) + s2 * frac);
404                  }
405                  sample = sum / m_loopbackChannels;
406              }
407              else
408              {
409                  uint32_t idx1 = inputFrame * m_loopbackChannels;
410                  uint32_t idx2 = nextFrame * m_loopbackChannels;
411                  float s1 = m_resampleInputBuffer[idx1];
412                  float s2 = m_resampleInputBuffer[idx2];
413                  sample = static_cast<float>(s1 * (1.0 - frac) + s2 * frac);
414              }
415  
416              resampledSamples.push_back(sample);
417          }
418  
419          m_resampleInputPos += m_resampleRatio;
420      }
421  
422      uint32_t consumedFrames = static_cast<uint32_t>(m_resampleInputPos);
423      if (consumedFrames > 0)
424      {
425          size_t samplesToErase = static_cast<size_t>(consumedFrames) * m_loopbackChannels;
426          if (samplesToErase >= m_resampleInputBuffer.size())
427          {
428              m_resampleInputBuffer.clear();
429              m_resampleInputPos = 0.0;
430          }
431          else
432          {
433              m_resampleInputBuffer.erase(m_resampleInputBuffer.begin(), m_resampleInputBuffer.begin() + samplesToErase);
434              m_resampleInputPos -= consumedFrames;
435          }
436      }
437  
438      if (flushRemaining)
439      {
440          m_resampleInputBuffer.clear();
441          m_resampleInputPos = 0.0;
442      }
443  
444      if (!resampledSamples.empty())
445      {
446          auto loopbackLock = m_loopbackBufferLock.lock_exclusive();
447          const size_t maxBufferSize = static_cast<size_t>(m_graphSampleRate) * m_graphChannels;
448  
449          if (m_loopbackBuffer.size() + resampledSamples.size() > maxBufferSize)
450          {
451              size_t overflow = (m_loopbackBuffer.size() + resampledSamples.size()) - maxBufferSize;
452              if (overflow >= m_loopbackBuffer.size())
453              {
454                  m_loopbackBuffer.clear();
455              }
456              else
457              {
458                  m_loopbackBuffer.erase(m_loopbackBuffer.begin(), m_loopbackBuffer.begin() + overflow);
459              }
460          }
461  
462          m_loopbackBuffer.insert(m_loopbackBuffer.end(), resampledSamples.begin(), resampledSamples.end());
463      }
464  }
465  
466  void AudioSampleGenerator::FlushRemainingAudio()
467  {
468      // Called during stop to drain any remaining samples from loopback capture
469      // and convert them to MediaStreamSamples before the audio graph stops.
470  
471      if (!m_loopbackCapture)
472      {
473          return;
474      }
475  
476      auto lock = m_lock.lock_exclusive();
477  
478      // Drain all remaining samples from the loopback capture client
479      std::vector<float> rawLoopbackSamples;
480      {
481          std::vector<float> tempSamples;
482          while (m_loopbackCapture->TryGetSamples(tempSamples))
483          {
484              rawLoopbackSamples.insert(rawLoopbackSamples.end(), tempSamples.begin(), tempSamples.end());
485          }
486      }
487  
488      // Resample and channel-convert the loopback audio to match AudioGraph format
489      if (!rawLoopbackSamples.empty())
490      {
491          AppendResampledLoopbackSamples(rawLoopbackSamples, true);
492      }
493  
494      // Now convert everything in m_loopbackBuffer to MediaStreamSamples
495      auto loopbackLock = m_loopbackBufferLock.lock_exclusive();
496  
497      if (!m_loopbackBuffer.empty())
498      {
499          uint32_t outputSampleCount = static_cast<uint32_t>(m_loopbackBuffer.size());
500          std::vector<uint8_t> outputData(outputSampleCount * sizeof(float), 0);
501          float* outputFloats = reinterpret_cast<float*>(outputData.data());
502  
503          for (uint32_t i = 0; i < outputSampleCount; i++)
504          {
505              float sample = m_loopbackBuffer[i];
506              if (sample > 1.0f) sample = 1.0f;
507              else if (sample < -1.0f) sample = -1.0f;
508              outputFloats[i] = sample;
509          }
510  
511          m_loopbackBuffer.clear();
512  
513          // Create buffer and sample
514          winrt::Buffer sampleBuffer(outputSampleCount * sizeof(float));
515          memcpy(sampleBuffer.data(), outputData.data(), outputData.size());
516          sampleBuffer.Length(static_cast<uint32_t>(outputData.size()));
517  
518          if (sampleBuffer.Length() > 0)
519          {
520              const uint32_t sampleCount = sampleBuffer.Length() / sizeof(float);
521              const uint32_t frames = (m_graphChannels > 0) ? (sampleCount / m_graphChannels) : 0;
522              const int64_t durationTicks = (m_graphSampleRate > 0) ? (static_cast<int64_t>(frames) * 10000000LL / m_graphSampleRate) : 0;
523              const winrt::TimeSpan duration{ durationTicks };
524  
525              winrt::TimeSpan timestamp{ 0 };
526              if (m_hasLastSampleTimestamp)
527              {
528                  timestamp = winrt::TimeSpan{ m_lastSampleTimestamp.count() + m_lastSampleDuration.count() };
529              }
530  
531              auto sample = winrt::MediaStreamSample::CreateFromBuffer(sampleBuffer, timestamp);
532              m_samples.push_back(sample);
533              m_audioEvent.SetEvent();
534  
535              m_lastSampleTimestamp = timestamp;
536              m_lastSampleDuration = duration;
537              m_hasLastSampleTimestamp = true;
538          }
539      }
540  }
541  
542  void AudioSampleGenerator::CombineQueuedSamples()
543  {
544      // Combine all queued samples into a single sample so it can be returned
545      // immediately in the next TryGetNextSample call. This is critical because
546      // once video ends, the MediaTranscoder may only request one more audio sample.
547  
548      auto lock = m_lock.lock_exclusive();
549  
550      if (m_samples.size() <= 1)
551      {
552          return;
553      }
554  
555      // Calculate total size and collect all sample data
556      size_t totalBytes = 0;
557      std::vector<std::pair<winrt::Windows::Storage::Streams::IBuffer, winrt::Windows::Foundation::TimeSpan>> buffers;
558      winrt::Windows::Foundation::TimeSpan firstTimestamp{ 0 };
559      bool hasFirstTimestamp = false;
560  
561      for (auto& sample : m_samples)
562      {
563          auto buffer = sample.Buffer();
564          if (buffer)
565          {
566              totalBytes += buffer.Length();
567              if (!hasFirstTimestamp)
568              {
569                  firstTimestamp = sample.Timestamp();
570                  hasFirstTimestamp = true;
571              }
572              buffers.push_back({ buffer, sample.Timestamp() });
573          }
574      }
575  
576      if (totalBytes == 0)
577      {
578          return;
579      }
580  
581      // Create combined buffer
582      winrt::Buffer combinedBuffer(static_cast<uint32_t>(totalBytes));
583      uint8_t* dest = combinedBuffer.data();
584      uint32_t offset = 0;
585  
586      for (auto& [buffer, ts] : buffers)
587      {
588          uint32_t len = buffer.Length();
589          memcpy(dest + offset, buffer.data(), len);
590          offset += len;
591      }
592      combinedBuffer.Length(static_cast<uint32_t>(totalBytes));
593  
594      // Create combined sample with first timestamp
595      auto combinedSample = winrt::Windows::Media::Core::MediaStreamSample::CreateFromBuffer(combinedBuffer, firstTimestamp);
596  
597      // Clear queue and add combined sample
598      m_samples.clear();
599      m_samples.push_back(combinedSample);
600  
601      // Update timestamp tracking
602      const uint32_t sampleCount = static_cast<uint32_t>(totalBytes) / sizeof(float);
603      const uint32_t frames = (m_graphChannels > 0) ? (sampleCount / m_graphChannels) : 0;
604      const int64_t durationTicks = (m_graphSampleRate > 0) ? (static_cast<int64_t>(frames) * 10000000LL / m_graphSampleRate) : 0;
605      m_lastSampleTimestamp = firstTimestamp;
606      m_lastSampleDuration = winrt::Windows::Foundation::TimeSpan{ durationTicks };
607      m_hasLastSampleTimestamp = true;
608  }
609  
610  void AudioSampleGenerator::OnAudioQuantumStarted(winrt::AudioGraph const& sender, winrt::IInspectable const& args)
611  {
612      // Don't process if we're not actively recording
613      if (!m_started.load())
614      {
615          return;
616      }
617  
618      {
619          auto lock = m_lock.lock_exclusive();
620  
621          auto frame = m_audioOutputNode.GetFrame();
622          std::optional<winrt::TimeSpan> timestamp = frame.RelativeTime();
623          auto audioBuffer = frame.LockBuffer(winrt::AudioBufferAccessMode::Read);
624  
625          // Get mic audio as a buffer (may be empty if no microphone)
626          auto sampleBuffer = winrt::Buffer::CreateCopyFromMemoryBuffer(audioBuffer);
627          sampleBuffer.Length(audioBuffer.Length());
628  
629          // Calculate expected samples per quantum (~10ms at graph sample rate)
630          // AudioGraph uses 10ms quantums by default
631          uint32_t expectedSamplesPerQuantum = (m_graphSampleRate / 100) * m_graphChannels;
632          uint32_t numMicSamples = audioBuffer.Length() / sizeof(float);
633  
634          // Drain loopback samples regardless of whether we have mic audio
635          if (m_loopbackCapture)
636          {
637              std::vector<float> rawLoopbackSamples;
638              {
639                  std::vector<float> tempSamples;
640                  while (m_loopbackCapture->TryGetSamples(tempSamples))
641                  {
642                      rawLoopbackSamples.insert(rawLoopbackSamples.end(), tempSamples.begin(), tempSamples.end());
643                  }
644              }
645  
646              // Resample and channel-convert the loopback audio to match AudioGraph format
647              if (!rawLoopbackSamples.empty())
648              {
649                  AppendResampledLoopbackSamples(rawLoopbackSamples);
650              }
651          }
652  
653          // Determine the actual number of samples we'll output
654          // Use mic sample count if mic is enabled
655          uint32_t outputSampleCount = m_captureMicrophone ? numMicSamples : expectedSamplesPerQuantum;
656  
657          // If microphone is disabled, create a buffer with only loopback audio
658          if (!m_captureMicrophone && outputSampleCount > 0)
659          {
660              // Create a buffer filled with loopback audio or silence
661              std::vector<uint8_t> outputData(outputSampleCount * sizeof(float), 0);
662              float* outputFloats = reinterpret_cast<float*>(outputData.data());
663  
664              {
665                  auto loopbackLock = m_loopbackBufferLock.lock_exclusive();
666                  uint32_t samplesToUse = min(outputSampleCount, static_cast<uint32_t>(m_loopbackBuffer.size()));
667  
668                  for (uint32_t i = 0; i < samplesToUse; i++)
669                  {
670                      float sample = m_loopbackBuffer[i];
671                      if (sample > 1.0f) sample = 1.0f;
672                      else if (sample < -1.0f) sample = -1.0f;
673                      outputFloats[i] = sample;
674                  }
675  
676                  if (samplesToUse > 0)
677                  {
678                      m_loopbackBuffer.erase(m_loopbackBuffer.begin(), m_loopbackBuffer.begin() + samplesToUse);
679                  }
680              }
681  
682              // Create a new buffer with our loopback data
683              sampleBuffer = winrt::Buffer(outputSampleCount * sizeof(float));
684              memcpy(sampleBuffer.data(), outputData.data(), outputData.size());
685              sampleBuffer.Length(static_cast<uint32_t>(outputData.size()));
686          }
687          else if (m_captureMicrophone && numMicSamples > 0)
688          {
689              // Mix loopback into mic samples
690              auto loopbackLock = m_loopbackBufferLock.lock_exclusive();
691              float* bufferData = reinterpret_cast<float*>(sampleBuffer.data());
692              uint32_t samplesToMix = min(numMicSamples, static_cast<uint32_t>(m_loopbackBuffer.size()));
693  
694              for (uint32_t i = 0; i < samplesToMix; i++)
695              {
696                  float mixed = bufferData[i] + m_loopbackBuffer[i];
697                  if (mixed > 1.0f) mixed = 1.0f;
698                  else if (mixed < -1.0f) mixed = -1.0f;
699                  bufferData[i] = mixed;
700              }
701  
702              if (samplesToMix > 0)
703              {
704                  m_loopbackBuffer.erase(m_loopbackBuffer.begin(), m_loopbackBuffer.begin() + samplesToMix);
705              }
706          }
707  
708          if (sampleBuffer.Length() > 0)
709          {
710              auto sample = winrt::MediaStreamSample::CreateFromBuffer(sampleBuffer, timestamp.value());
711              m_samples.push_back(sample);
712  
713              const uint32_t sampleCount = sampleBuffer.Length() / sizeof(float);
714              const uint32_t frames = (m_graphChannels > 0) ? (sampleCount / m_graphChannels) : 0;
715              const int64_t durationTicks = (m_graphSampleRate > 0) ? (static_cast<int64_t>(frames) * 10000000LL / m_graphSampleRate) : 0;
716              m_lastSampleTimestamp = timestamp.value();
717              m_lastSampleDuration = winrt::TimeSpan{ durationTicks };
718              m_hasLastSampleTimestamp = true;
719          }
720      }
721      m_audioEvent.SetEvent();
722  }