AudioSampleGenerator.cpp
1 #include "pch.h" 2 #include "AudioSampleGenerator.h" 3 #include "CaptureFrameWait.h" 4 #include "LoopbackCapture.h" 5 #include <wrl/client.h> 6 7 extern TCHAR g_MicrophoneDeviceId[]; 8 9 namespace 10 { 11 // Declare the IMemoryBufferByteAccess interface for accessing raw buffer data 12 MIDL_INTERFACE("5b0d3235-4dba-4d44-8657-1f1d0f83e9a3") 13 IMemoryBufferByteAccess : public IUnknown 14 { 15 public: 16 virtual HRESULT STDMETHODCALLTYPE GetBuffer( 17 BYTE** value, 18 UINT32* capacity) = 0; 19 }; 20 } 21 22 namespace winrt 23 { 24 using namespace Windows::Foundation; 25 using namespace Windows::Storage; 26 using namespace Windows::Storage::Streams; 27 using namespace Windows::Media; 28 using namespace Windows::Media::Audio; 29 using namespace Windows::Media::Capture; 30 using namespace Windows::Media::Core; 31 using namespace Windows::Media::Render; 32 using namespace Windows::Media::MediaProperties; 33 using namespace Windows::Media::Devices; 34 using namespace Windows::Devices::Enumeration; 35 } 36 37 AudioSampleGenerator::AudioSampleGenerator(bool captureMicrophone, bool captureSystemAudio) 38 : m_captureMicrophone(captureMicrophone) 39 , m_captureSystemAudio(captureSystemAudio) 40 { 41 OutputDebugStringA(("AudioSampleGenerator created, captureMicrophone=" + 42 std::string(captureMicrophone ? "true" : "false") + 43 ", captureSystemAudio=" + std::string(captureSystemAudio ? "true" : "false") + "\n").c_str()); 44 m_audioEvent.create(wil::EventOptions::ManualReset); 45 m_endEvent.create(wil::EventOptions::ManualReset); 46 m_startEvent.create(wil::EventOptions::ManualReset); 47 m_asyncInitialized.create(wil::EventOptions::ManualReset); 48 } 49 50 AudioSampleGenerator::~AudioSampleGenerator() 51 { 52 Stop(); 53 if (m_audioGraph) 54 { 55 m_audioGraph.Close(); 56 } 57 } 58 59 winrt::IAsyncAction AudioSampleGenerator::InitializeAsync() 60 { 61 auto expected = false; 62 if (m_initialized.compare_exchange_strong(expected, true)) 63 { 64 // Reset state in case this instance is reused. 65 m_endEvent.ResetEvent(); 66 m_startEvent.ResetEvent(); 67 68 // Initialize the audio graph 69 auto audioGraphSettings = winrt::AudioGraphSettings(winrt::AudioRenderCategory::Media); 70 auto audioGraphResult = co_await winrt::AudioGraph::CreateAsync(audioGraphSettings); 71 if (audioGraphResult.Status() != winrt::AudioGraphCreationStatus::Success) 72 { 73 throw winrt::hresult_error(E_FAIL, L"Failed to initialize AudioGraph!"); 74 } 75 m_audioGraph = audioGraphResult.Graph(); 76 77 // Get AudioGraph encoding properties for resampling 78 auto graphProps = m_audioGraph.EncodingProperties(); 79 m_graphSampleRate = graphProps.SampleRate(); 80 m_graphChannels = graphProps.ChannelCount(); 81 82 OutputDebugStringA(("AudioGraph initialized: " + std::to_string(m_graphSampleRate) + 83 " Hz, " + std::to_string(m_graphChannels) + " ch\n").c_str()); 84 85 // Create submix node to mix microphone and loopback audio 86 m_submixNode = m_audioGraph.CreateSubmixNode(); 87 m_audioOutputNode = m_audioGraph.CreateFrameOutputNode(); 88 m_submixNode.AddOutgoingConnection(m_audioOutputNode); 89 90 // Initialize WASAPI loopback capture for system audio (if enabled) 91 if (m_captureSystemAudio) 92 { 93 m_loopbackCapture = std::make_unique<LoopbackCapture>(); 94 } 95 if (m_loopbackCapture && SUCCEEDED(m_loopbackCapture->Initialize())) 96 { 97 auto loopbackFormat = m_loopbackCapture->GetFormat(); 98 if (loopbackFormat) 99 { 100 m_loopbackChannels = loopbackFormat->nChannels; 101 m_loopbackSampleRate = loopbackFormat->nSamplesPerSec; 102 m_resampleRatio = static_cast<double>(m_loopbackSampleRate) / static_cast<double>(m_graphSampleRate); 103 104 OutputDebugStringA(("Loopback initialized: " + std::to_string(m_loopbackSampleRate) + 105 " Hz, " + std::to_string(m_loopbackChannels) + " ch, resample ratio=" + 106 std::to_string(m_resampleRatio) + "\n").c_str()); 107 } 108 } 109 else if (m_captureSystemAudio) 110 { 111 OutputDebugStringA("WARNING: Failed to initialize loopback capture\n"); 112 m_loopbackCapture.reset(); 113 } 114 115 // Always initialize a microphone input node to keep the AudioGraph running at real-time pace. 116 // When mic capture is disabled, we mute it so only loopback audio is captured. 117 { 118 auto defaultMicrophoneId = winrt::MediaDevice::GetDefaultAudioCaptureId(winrt::AudioDeviceRole::Default); 119 auto microphoneId = (m_captureMicrophone && g_MicrophoneDeviceId[0] != 0) 120 ? winrt::to_hstring(g_MicrophoneDeviceId) 121 : defaultMicrophoneId; 122 if (!microphoneId.empty()) 123 { 124 auto microphone = co_await winrt::DeviceInformation::CreateFromIdAsync(microphoneId); 125 126 // Initialize audio input node 127 auto inputNodeResult = co_await m_audioGraph.CreateDeviceInputNodeAsync(winrt::MediaCategory::Media, m_audioGraph.EncodingProperties(), microphone); 128 if (inputNodeResult.Status() != winrt::AudioDeviceNodeCreationStatus::Success && microphoneId != defaultMicrophoneId) 129 { 130 // If the selected microphone failed, try again with the default 131 microphone = co_await winrt::DeviceInformation::CreateFromIdAsync(defaultMicrophoneId); 132 inputNodeResult = co_await m_audioGraph.CreateDeviceInputNodeAsync(winrt::MediaCategory::Media, m_audioGraph.EncodingProperties(), microphone); 133 } 134 if (inputNodeResult.Status() == winrt::AudioDeviceNodeCreationStatus::Success) 135 { 136 m_audioInputNode = inputNodeResult.DeviceInputNode(); 137 m_audioInputNode.AddOutgoingConnection(m_submixNode); 138 139 // If mic capture is disabled, mute the input so only loopback is captured 140 if (!m_captureMicrophone) 141 { 142 m_audioInputNode.OutgoingGain(0.0); 143 OutputDebugStringA("Mic input created but muted (loopback-only mode)\n"); 144 } 145 else 146 { 147 OutputDebugStringA("Mic input created and active\n"); 148 } 149 } 150 } 151 } 152 153 // Loopback capture is only required when system audio capture is enabled 154 if (m_captureSystemAudio && !m_loopbackCapture) 155 { 156 throw winrt::hresult_error(E_FAIL, L"Failed to initialize loopback audio capture!"); 157 } 158 159 m_audioGraph.QuantumStarted({ this, &AudioSampleGenerator::OnAudioQuantumStarted }); 160 161 m_asyncInitialized.SetEvent(); 162 } 163 } 164 165 winrt::AudioEncodingProperties AudioSampleGenerator::GetEncodingProperties() 166 { 167 CheckInitialized(); 168 return m_audioOutputNode.EncodingProperties(); 169 } 170 171 std::optional<winrt::MediaStreamSample> AudioSampleGenerator::TryGetNextSample() 172 { 173 CheckInitialized(); 174 175 // The MediaStreamSource can request audio samples before we've started the audio graph. 176 // Instead of throwing (which crashes the app), wait until either Start() is called 177 // or Stop() signals end-of-stream. 178 if (!m_started.load()) 179 { 180 std::vector<HANDLE> events = { m_endEvent.get(), m_startEvent.get() }; 181 auto waitResult = WaitForMultipleObjectsEx(static_cast<DWORD>(events.size()), events.data(), false, INFINITE, false); 182 auto eventIndex = -1; 183 switch (waitResult) 184 { 185 case WAIT_OBJECT_0: 186 case WAIT_OBJECT_0 + 1: 187 eventIndex = waitResult - WAIT_OBJECT_0; 188 break; 189 } 190 WINRT_VERIFY(eventIndex >= 0); 191 192 if (events[eventIndex] == m_endEvent.get()) 193 { 194 // End event signaled, but check if there are any remaining samples in the queue 195 auto lock = m_lock.lock_exclusive(); 196 if (!m_samples.empty()) 197 { 198 std::optional result(m_samples.front()); 199 m_samples.pop_front(); 200 return result; 201 } 202 return std::nullopt; 203 } 204 } 205 206 { 207 auto lock = m_lock.lock_exclusive(); 208 if (m_samples.empty() && m_endEvent.is_signaled()) 209 { 210 return std::nullopt; 211 } 212 else if (!m_samples.empty()) 213 { 214 std::optional result(m_samples.front()); 215 m_samples.pop_front(); 216 return result; 217 } 218 } 219 220 m_audioEvent.ResetEvent(); 221 std::vector<HANDLE> events = { m_endEvent.get(), m_audioEvent.get() }; 222 auto waitResult = WaitForMultipleObjectsEx(static_cast<DWORD>(events.size()), events.data(), false, INFINITE, false); 223 auto eventIndex = -1; 224 switch (waitResult) 225 { 226 case WAIT_OBJECT_0: 227 case WAIT_OBJECT_0 + 1: 228 eventIndex = waitResult - WAIT_OBJECT_0; 229 break; 230 } 231 WINRT_VERIFY(eventIndex >= 0); 232 233 auto signaledEvent = events[eventIndex]; 234 if (signaledEvent == m_endEvent.get()) 235 { 236 // End was signaled, but check for any remaining samples before returning nullopt 237 auto lock = m_lock.lock_exclusive(); 238 if (!m_samples.empty()) 239 { 240 std::optional result(m_samples.front()); 241 m_samples.pop_front(); 242 return result; 243 } 244 return std::nullopt; 245 } 246 else 247 { 248 auto lock = m_lock.lock_exclusive(); 249 if (m_samples.empty()) 250 { 251 // Spurious wake or race - no samples available 252 // If end is signaled, return nullopt 253 return m_endEvent.is_signaled() ? std::nullopt : std::optional<winrt::MediaStreamSample>{}; 254 } 255 std::optional result(m_samples.front()); 256 m_samples.pop_front(); 257 return result; 258 } 259 } 260 261 void AudioSampleGenerator::Start() 262 { 263 CheckInitialized(); 264 auto expected = false; 265 if (m_started.compare_exchange_strong(expected, true)) 266 { 267 m_endEvent.ResetEvent(); 268 m_startEvent.SetEvent(); 269 270 // Start loopback capture if available 271 if (m_loopbackCapture) 272 { 273 // Clear any stale samples 274 { 275 auto lock = m_loopbackBufferLock.lock_exclusive(); 276 m_loopbackBuffer.clear(); 277 } 278 279 m_resampleInputBuffer.clear(); 280 m_resampleInputPos = 0.0; 281 282 m_loopbackCapture->Start(); 283 } 284 285 m_audioGraph.Start(); 286 } 287 } 288 289 void AudioSampleGenerator::Stop() 290 { 291 // Stop may be called during teardown even if initialization hasn't completed. 292 // It must never throw. 293 294 if (!m_initialized.load()) 295 { 296 m_endEvent.SetEvent(); 297 return; 298 } 299 300 m_asyncInitialized.wait(); 301 302 // Stop loopback capture first 303 if (m_loopbackCapture) 304 { 305 m_loopbackCapture->Stop(); 306 } 307 308 // Flush any remaining samples from the loopback capture before stopping the audio graph 309 FlushRemainingAudio(); 310 311 // Stop the audio graph - no more quantum callbacks will run 312 m_audioGraph.Stop(); 313 314 // Close the microphone input node to release the device so Windows no longer 315 // reports the microphone as in use by ZoomIt. 316 if (m_audioInputNode) 317 { 318 m_audioInputNode.Close(); 319 m_audioInputNode = nullptr; 320 } 321 322 // Mark as stopped 323 m_started.store(false); 324 325 // Combine all remaining queued samples into one final sample so it can be 326 // returned immediately without waiting for additional TryGetNextSample calls 327 CombineQueuedSamples(); 328 329 // NOW signal end event - this allows TryGetNextSample to return remaining 330 // queued samples and then return nullopt 331 m_endEvent.SetEvent(); 332 m_audioEvent.SetEvent(); // Also wake any waiting TryGetNextSample 333 334 // DO NOT clear m_loopbackBuffer or m_samples here - allow MediaTranscoder to 335 // consume remaining queued audio samples to avoid audio cutoff at end of recording. 336 // TryGetNextSample() will return nullopt once m_samples is empty and 337 // m_endEvent is signaled. Buffers will be cleaned up on destruction. 338 } 339 340 void AudioSampleGenerator::AppendResampledLoopbackSamples(std::vector<float> const& rawLoopbackSamples, bool flushRemaining) 341 { 342 if (rawLoopbackSamples.empty()) 343 { 344 return; 345 } 346 347 m_resampleInputBuffer.insert(m_resampleInputBuffer.end(), rawLoopbackSamples.begin(), rawLoopbackSamples.end()); 348 349 if (m_loopbackChannels == 0 || m_graphChannels == 0 || m_resampleRatio <= 0.0) 350 { 351 return; 352 } 353 354 std::vector<float> resampledSamples; 355 while (true) 356 { 357 const uint32_t inputFrames = static_cast<uint32_t>(m_resampleInputBuffer.size() / m_loopbackChannels); 358 if (inputFrames == 0) 359 { 360 break; 361 } 362 363 if (!flushRemaining) 364 { 365 if (inputFrames < 2 || (m_resampleInputPos + 1.0) >= inputFrames) 366 { 367 break; 368 } 369 } 370 else 371 { 372 if (m_resampleInputPos >= inputFrames) 373 { 374 break; 375 } 376 } 377 378 uint32_t inputFrame = static_cast<uint32_t>(m_resampleInputPos); 379 double frac = m_resampleInputPos - inputFrame; 380 uint32_t nextFrame = (inputFrame + 1 < inputFrames) ? (inputFrame + 1) : inputFrame; 381 382 for (uint32_t outCh = 0; outCh < m_graphChannels; outCh++) 383 { 384 float sample = 0.0f; 385 386 if (m_loopbackChannels == m_graphChannels) 387 { 388 uint32_t idx1 = inputFrame * m_loopbackChannels + outCh; 389 uint32_t idx2 = nextFrame * m_loopbackChannels + outCh; 390 float s1 = m_resampleInputBuffer[idx1]; 391 float s2 = m_resampleInputBuffer[idx2]; 392 sample = static_cast<float>(s1 * (1.0 - frac) + s2 * frac); 393 } 394 else if (m_loopbackChannels > m_graphChannels) 395 { 396 float sum = 0.0f; 397 for (uint32_t inCh = 0; inCh < m_loopbackChannels; inCh++) 398 { 399 uint32_t idx1 = inputFrame * m_loopbackChannels + inCh; 400 uint32_t idx2 = nextFrame * m_loopbackChannels + inCh; 401 float s1 = m_resampleInputBuffer[idx1]; 402 float s2 = m_resampleInputBuffer[idx2]; 403 sum += static_cast<float>(s1 * (1.0 - frac) + s2 * frac); 404 } 405 sample = sum / m_loopbackChannels; 406 } 407 else 408 { 409 uint32_t idx1 = inputFrame * m_loopbackChannels; 410 uint32_t idx2 = nextFrame * m_loopbackChannels; 411 float s1 = m_resampleInputBuffer[idx1]; 412 float s2 = m_resampleInputBuffer[idx2]; 413 sample = static_cast<float>(s1 * (1.0 - frac) + s2 * frac); 414 } 415 416 resampledSamples.push_back(sample); 417 } 418 419 m_resampleInputPos += m_resampleRatio; 420 } 421 422 uint32_t consumedFrames = static_cast<uint32_t>(m_resampleInputPos); 423 if (consumedFrames > 0) 424 { 425 size_t samplesToErase = static_cast<size_t>(consumedFrames) * m_loopbackChannels; 426 if (samplesToErase >= m_resampleInputBuffer.size()) 427 { 428 m_resampleInputBuffer.clear(); 429 m_resampleInputPos = 0.0; 430 } 431 else 432 { 433 m_resampleInputBuffer.erase(m_resampleInputBuffer.begin(), m_resampleInputBuffer.begin() + samplesToErase); 434 m_resampleInputPos -= consumedFrames; 435 } 436 } 437 438 if (flushRemaining) 439 { 440 m_resampleInputBuffer.clear(); 441 m_resampleInputPos = 0.0; 442 } 443 444 if (!resampledSamples.empty()) 445 { 446 auto loopbackLock = m_loopbackBufferLock.lock_exclusive(); 447 const size_t maxBufferSize = static_cast<size_t>(m_graphSampleRate) * m_graphChannels; 448 449 if (m_loopbackBuffer.size() + resampledSamples.size() > maxBufferSize) 450 { 451 size_t overflow = (m_loopbackBuffer.size() + resampledSamples.size()) - maxBufferSize; 452 if (overflow >= m_loopbackBuffer.size()) 453 { 454 m_loopbackBuffer.clear(); 455 } 456 else 457 { 458 m_loopbackBuffer.erase(m_loopbackBuffer.begin(), m_loopbackBuffer.begin() + overflow); 459 } 460 } 461 462 m_loopbackBuffer.insert(m_loopbackBuffer.end(), resampledSamples.begin(), resampledSamples.end()); 463 } 464 } 465 466 void AudioSampleGenerator::FlushRemainingAudio() 467 { 468 // Called during stop to drain any remaining samples from loopback capture 469 // and convert them to MediaStreamSamples before the audio graph stops. 470 471 if (!m_loopbackCapture) 472 { 473 return; 474 } 475 476 auto lock = m_lock.lock_exclusive(); 477 478 // Drain all remaining samples from the loopback capture client 479 std::vector<float> rawLoopbackSamples; 480 { 481 std::vector<float> tempSamples; 482 while (m_loopbackCapture->TryGetSamples(tempSamples)) 483 { 484 rawLoopbackSamples.insert(rawLoopbackSamples.end(), tempSamples.begin(), tempSamples.end()); 485 } 486 } 487 488 // Resample and channel-convert the loopback audio to match AudioGraph format 489 if (!rawLoopbackSamples.empty()) 490 { 491 AppendResampledLoopbackSamples(rawLoopbackSamples, true); 492 } 493 494 // Now convert everything in m_loopbackBuffer to MediaStreamSamples 495 auto loopbackLock = m_loopbackBufferLock.lock_exclusive(); 496 497 if (!m_loopbackBuffer.empty()) 498 { 499 uint32_t outputSampleCount = static_cast<uint32_t>(m_loopbackBuffer.size()); 500 std::vector<uint8_t> outputData(outputSampleCount * sizeof(float), 0); 501 float* outputFloats = reinterpret_cast<float*>(outputData.data()); 502 503 for (uint32_t i = 0; i < outputSampleCount; i++) 504 { 505 float sample = m_loopbackBuffer[i]; 506 if (sample > 1.0f) sample = 1.0f; 507 else if (sample < -1.0f) sample = -1.0f; 508 outputFloats[i] = sample; 509 } 510 511 m_loopbackBuffer.clear(); 512 513 // Create buffer and sample 514 winrt::Buffer sampleBuffer(outputSampleCount * sizeof(float)); 515 memcpy(sampleBuffer.data(), outputData.data(), outputData.size()); 516 sampleBuffer.Length(static_cast<uint32_t>(outputData.size())); 517 518 if (sampleBuffer.Length() > 0) 519 { 520 const uint32_t sampleCount = sampleBuffer.Length() / sizeof(float); 521 const uint32_t frames = (m_graphChannels > 0) ? (sampleCount / m_graphChannels) : 0; 522 const int64_t durationTicks = (m_graphSampleRate > 0) ? (static_cast<int64_t>(frames) * 10000000LL / m_graphSampleRate) : 0; 523 const winrt::TimeSpan duration{ durationTicks }; 524 525 winrt::TimeSpan timestamp{ 0 }; 526 if (m_hasLastSampleTimestamp) 527 { 528 timestamp = winrt::TimeSpan{ m_lastSampleTimestamp.count() + m_lastSampleDuration.count() }; 529 } 530 531 auto sample = winrt::MediaStreamSample::CreateFromBuffer(sampleBuffer, timestamp); 532 m_samples.push_back(sample); 533 m_audioEvent.SetEvent(); 534 535 m_lastSampleTimestamp = timestamp; 536 m_lastSampleDuration = duration; 537 m_hasLastSampleTimestamp = true; 538 } 539 } 540 } 541 542 void AudioSampleGenerator::CombineQueuedSamples() 543 { 544 // Combine all queued samples into a single sample so it can be returned 545 // immediately in the next TryGetNextSample call. This is critical because 546 // once video ends, the MediaTranscoder may only request one more audio sample. 547 548 auto lock = m_lock.lock_exclusive(); 549 550 if (m_samples.size() <= 1) 551 { 552 return; 553 } 554 555 // Calculate total size and collect all sample data 556 size_t totalBytes = 0; 557 std::vector<std::pair<winrt::Windows::Storage::Streams::IBuffer, winrt::Windows::Foundation::TimeSpan>> buffers; 558 winrt::Windows::Foundation::TimeSpan firstTimestamp{ 0 }; 559 bool hasFirstTimestamp = false; 560 561 for (auto& sample : m_samples) 562 { 563 auto buffer = sample.Buffer(); 564 if (buffer) 565 { 566 totalBytes += buffer.Length(); 567 if (!hasFirstTimestamp) 568 { 569 firstTimestamp = sample.Timestamp(); 570 hasFirstTimestamp = true; 571 } 572 buffers.push_back({ buffer, sample.Timestamp() }); 573 } 574 } 575 576 if (totalBytes == 0) 577 { 578 return; 579 } 580 581 // Create combined buffer 582 winrt::Buffer combinedBuffer(static_cast<uint32_t>(totalBytes)); 583 uint8_t* dest = combinedBuffer.data(); 584 uint32_t offset = 0; 585 586 for (auto& [buffer, ts] : buffers) 587 { 588 uint32_t len = buffer.Length(); 589 memcpy(dest + offset, buffer.data(), len); 590 offset += len; 591 } 592 combinedBuffer.Length(static_cast<uint32_t>(totalBytes)); 593 594 // Create combined sample with first timestamp 595 auto combinedSample = winrt::Windows::Media::Core::MediaStreamSample::CreateFromBuffer(combinedBuffer, firstTimestamp); 596 597 // Clear queue and add combined sample 598 m_samples.clear(); 599 m_samples.push_back(combinedSample); 600 601 // Update timestamp tracking 602 const uint32_t sampleCount = static_cast<uint32_t>(totalBytes) / sizeof(float); 603 const uint32_t frames = (m_graphChannels > 0) ? (sampleCount / m_graphChannels) : 0; 604 const int64_t durationTicks = (m_graphSampleRate > 0) ? (static_cast<int64_t>(frames) * 10000000LL / m_graphSampleRate) : 0; 605 m_lastSampleTimestamp = firstTimestamp; 606 m_lastSampleDuration = winrt::Windows::Foundation::TimeSpan{ durationTicks }; 607 m_hasLastSampleTimestamp = true; 608 } 609 610 void AudioSampleGenerator::OnAudioQuantumStarted(winrt::AudioGraph const& sender, winrt::IInspectable const& args) 611 { 612 // Don't process if we're not actively recording 613 if (!m_started.load()) 614 { 615 return; 616 } 617 618 { 619 auto lock = m_lock.lock_exclusive(); 620 621 auto frame = m_audioOutputNode.GetFrame(); 622 std::optional<winrt::TimeSpan> timestamp = frame.RelativeTime(); 623 auto audioBuffer = frame.LockBuffer(winrt::AudioBufferAccessMode::Read); 624 625 // Get mic audio as a buffer (may be empty if no microphone) 626 auto sampleBuffer = winrt::Buffer::CreateCopyFromMemoryBuffer(audioBuffer); 627 sampleBuffer.Length(audioBuffer.Length()); 628 629 // Calculate expected samples per quantum (~10ms at graph sample rate) 630 // AudioGraph uses 10ms quantums by default 631 uint32_t expectedSamplesPerQuantum = (m_graphSampleRate / 100) * m_graphChannels; 632 uint32_t numMicSamples = audioBuffer.Length() / sizeof(float); 633 634 // Drain loopback samples regardless of whether we have mic audio 635 if (m_loopbackCapture) 636 { 637 std::vector<float> rawLoopbackSamples; 638 { 639 std::vector<float> tempSamples; 640 while (m_loopbackCapture->TryGetSamples(tempSamples)) 641 { 642 rawLoopbackSamples.insert(rawLoopbackSamples.end(), tempSamples.begin(), tempSamples.end()); 643 } 644 } 645 646 // Resample and channel-convert the loopback audio to match AudioGraph format 647 if (!rawLoopbackSamples.empty()) 648 { 649 AppendResampledLoopbackSamples(rawLoopbackSamples); 650 } 651 } 652 653 // Determine the actual number of samples we'll output 654 // Use mic sample count if mic is enabled 655 uint32_t outputSampleCount = m_captureMicrophone ? numMicSamples : expectedSamplesPerQuantum; 656 657 // If microphone is disabled, create a buffer with only loopback audio 658 if (!m_captureMicrophone && outputSampleCount > 0) 659 { 660 // Create a buffer filled with loopback audio or silence 661 std::vector<uint8_t> outputData(outputSampleCount * sizeof(float), 0); 662 float* outputFloats = reinterpret_cast<float*>(outputData.data()); 663 664 { 665 auto loopbackLock = m_loopbackBufferLock.lock_exclusive(); 666 uint32_t samplesToUse = min(outputSampleCount, static_cast<uint32_t>(m_loopbackBuffer.size())); 667 668 for (uint32_t i = 0; i < samplesToUse; i++) 669 { 670 float sample = m_loopbackBuffer[i]; 671 if (sample > 1.0f) sample = 1.0f; 672 else if (sample < -1.0f) sample = -1.0f; 673 outputFloats[i] = sample; 674 } 675 676 if (samplesToUse > 0) 677 { 678 m_loopbackBuffer.erase(m_loopbackBuffer.begin(), m_loopbackBuffer.begin() + samplesToUse); 679 } 680 } 681 682 // Create a new buffer with our loopback data 683 sampleBuffer = winrt::Buffer(outputSampleCount * sizeof(float)); 684 memcpy(sampleBuffer.data(), outputData.data(), outputData.size()); 685 sampleBuffer.Length(static_cast<uint32_t>(outputData.size())); 686 } 687 else if (m_captureMicrophone && numMicSamples > 0) 688 { 689 // Mix loopback into mic samples 690 auto loopbackLock = m_loopbackBufferLock.lock_exclusive(); 691 float* bufferData = reinterpret_cast<float*>(sampleBuffer.data()); 692 uint32_t samplesToMix = min(numMicSamples, static_cast<uint32_t>(m_loopbackBuffer.size())); 693 694 for (uint32_t i = 0; i < samplesToMix; i++) 695 { 696 float mixed = bufferData[i] + m_loopbackBuffer[i]; 697 if (mixed > 1.0f) mixed = 1.0f; 698 else if (mixed < -1.0f) mixed = -1.0f; 699 bufferData[i] = mixed; 700 } 701 702 if (samplesToMix > 0) 703 { 704 m_loopbackBuffer.erase(m_loopbackBuffer.begin(), m_loopbackBuffer.begin() + samplesToMix); 705 } 706 } 707 708 if (sampleBuffer.Length() > 0) 709 { 710 auto sample = winrt::MediaStreamSample::CreateFromBuffer(sampleBuffer, timestamp.value()); 711 m_samples.push_back(sample); 712 713 const uint32_t sampleCount = sampleBuffer.Length() / sizeof(float); 714 const uint32_t frames = (m_graphChannels > 0) ? (sampleCount / m_graphChannels) : 0; 715 const int64_t durationTicks = (m_graphSampleRate > 0) ? (static_cast<int64_t>(frames) * 10000000LL / m_graphSampleRate) : 0; 716 m_lastSampleTimestamp = timestamp.value(); 717 m_lastSampleDuration = winrt::TimeSpan{ durationTicks }; 718 m_hasLastSampleTimestamp = true; 719 } 720 } 721 m_audioEvent.SetEvent(); 722 }