/ MonsterMaskVoiceChanger / pdmvoice.ino
pdmvoice.ino
1 // SPDX-FileCopyrightText: 2019 Phillip Burgess for Adafruit Industries 2 // 3 // SPDX-License-Identifier: MIT 4 5 // Basic voice changer code. This version is specific to the Adafruit 6 // MONSTER M4SK board using a PDM microphone. 7 8 #include <SPI.h> 9 10 #define MIN_PITCH_HZ 65 11 #define MAX_PITCH_HZ 1600 12 #define TYP_PITCH_HZ 175 13 14 // Playback timer stuff - use TC3 on MONSTER M4SK (no TC4 on this board) 15 #define TIMER TC3 16 #define TIMER_IRQN TC3_IRQn 17 #define TIMER_IRQ_HANDLER TC3_Handler 18 #define TIMER_GCLK_ID TC3_GCLK_ID 19 #define TIMER_GCM_ID GCM_TC2_TC3 20 21 // PDM mic allows 1.0 to 3.25 MHz max clock (2.4 typical). 22 // SPI native max is is 24 MHz, so available speeds are 12, 6, 3 MHz. 23 #define SPI_BITRATE 3000000 24 static SPISettings settings(SPI_BITRATE, LSBFIRST, SPI_MODE0); 25 // 3 MHz / 32 bits = 93,750 Hz interrupt frequency 26 // 2 interrupts/sample = 46,875 Hz audio sample rate 27 const float sampleRate = (float)SPI_BITRATE / 64.0; 28 // sampleRate is float in case factors change to make it not divide evenly. 29 // It DOES NOT CHANGE over time, only playbackRate does. 30 31 // Although SPI lib now has an option to get an SPI object's SERCOM number 32 // at run time, the interrupt handler MUST be declared at compile time... 33 // so it's necessary to know the SERCOM # ahead of time anyway, oh well. 34 #define PDM_SERCOM SERCOM3 // PDM mic SPI SERCOM on MONSTER M4SK 35 #define PDM_SPI SPI2 // PDM mic SPI peripheral 36 #define PDM_SERCOM_HANDLER SERCOM3_0_Handler 37 #define PDM_SERCOM_IRQn SERCOM3_0_IRQn // _0_IRQn is DRE interrupt 38 39 static Sercom *sercom; 40 static volatile uint32_t *dataReg; 41 42 Sercom * const sercomList[] = { 43 SERCOM0, SERCOM1, SERCOM2, SERCOM3, 44 #if defined(SERCOM4) 45 SERCOM4, 46 #endif 47 #if defined(SERCOM5) 48 SERCOM5, 49 #endif 50 #if defined(SERCOM6) 51 SERCOM6, 52 #endif 53 #if defined(SERCOM7) 54 SERCOM7, 55 #endif 56 }; 57 58 static float playbackRate = sampleRate; 59 static uint16_t *recBuf = NULL; 60 // recBuf currently gets allocated (in voiceSetup()) for two full cycles of 61 // the lowest pitch we're likely to encounter. Right now it doesn't really 62 // NEED to be this size, but if pitch detection is added in the future then 63 // this'll become more useful. 64 // 46,875 sampling rate from mic, 65 Hz lowest pitch -> 2884 bytes. 65 static const uint16_t recBufSize = (uint16_t)(sampleRate / (float)MIN_PITCH_HZ * 2.0 + 0.5); 66 static int16_t recIndex = 0; 67 static int16_t playbackIndex = 0; 68 69 volatile uint16_t voiceLastReading = 0; 70 71 #define DC_PERIOD 4096 // Recalculate DC offset this many samplings 72 // DC_PERIOD does NOT need to be a power of 2, but might save a few cycles. 73 // PDM rate is 46875, so 4096 = 11.44 times/sec 74 static uint16_t dcCounter = 0; // Rolls over every DC_PERIOD samples 75 static uint32_t dcSum = 0; // Accumulates DC_PERIOD samples 76 static uint16_t dcOffsetPrior = 32768; // DC offset interpolates linearly 77 static uint16_t dcOffsetNext = 32768; // between these two values 78 79 static uint16_t micGain = 256; // 1:1 80 81 // Just playing back directly from the recording circular buffer produces 82 // audible clicks as the waveforms rarely align at the beginning and end of 83 // the buffer. So what we do is advance or push back the playback index a 84 // certain amount when it's likely to overtake or underflow the recording 85 // index, and interpolate from the current to the jumped-forward-or-back 86 // readings over a short period. In a perfect world, that "certain amount" 87 // would be one wavelength of the current voice pitch...BUT...with no pitch 88 // detecton currently, we instead use a fixed middle-of-the-road value: 89 // TYP_PITCH_HZ, 175 by default, which is a bit below typical female spoken 90 // vocal range and a bit above typical male spoken range. This all goes out 91 // the window with singing, and of course young people will have a higher 92 // speech range, is just a crude catch-all approximation. 93 static const uint16_t jump = (int)(sampleRate / (float)TYP_PITCH_HZ + 0.5); 94 static const uint16_t interp = jump / 4; // Interp time = 1/4 waveform 95 static bool jumping = false; 96 static uint16_t jumpCount = 1; 97 static int16_t jumpThreshold; 98 static int16_t playbackIndexJumped; 99 static uint16_t nextOut = 2048; 100 101 float voicePitch(float p); 102 103 // START PITCH SHIFT (no arguments) ---------------------------------------- 104 105 bool voiceSetup(void) { 106 107 // Allocate circular buffer for audio 108 if(NULL == (recBuf = (uint16_t *)malloc(recBufSize * sizeof(uint16_t)))) { 109 return false; // Fail 110 } 111 112 // Set up PDM microphone input ------------------------------------------- 113 114 PDM_SPI.begin(); 115 PDM_SPI.beginTransaction(settings); // this SPI transaction is left open 116 sercom = sercomList[PDM_SPI.getSercomIndex()]; 117 dataReg = PDM_SPI.getDataRegister(); 118 119 // Enabling 32-bit SPI must be done AFTER SPI.begin() which 120 // resets registers. But SPI.CTRLC (where 32-bit mode is set) is 121 // enable-protected, so peripheral must be disabled temporarily... 122 sercom->SPI.CTRLA.bit.ENABLE = 0; // Disable SPI 123 while(sercom->SPI.SYNCBUSY.bit.ENABLE); // Wait for disable 124 sercom->SPI.CTRLC.bit.DATA32B = 1; // Enable 32-bit mode 125 sercom->SPI.CTRLA.bit.ENABLE = 1; // Re-enable SPI 126 while(sercom->SPI.SYNCBUSY.bit.ENABLE); // Wait for enable 127 // 4-byte word length is implicit in 32-bit mode, 128 // no need to set up LENGTH register. 129 130 sercom->SPI.INTENSET.bit.DRE = 1; // Data-register-empty interrupt 131 NVIC_DisableIRQ(PDM_SERCOM_IRQn); 132 NVIC_ClearPendingIRQ(PDM_SERCOM_IRQn); 133 NVIC_SetPriority(PDM_SERCOM_IRQn, 0); // Top priority 134 NVIC_EnableIRQ(PDM_SERCOM_IRQn); 135 136 sercom->SPI.DATA.bit.DATA = 0; // Kick off SPI free-run 137 138 // Set up analog output & timer ------------------------------------------ 139 140 analogWriteResolution(12); 141 142 // Feed TIMER off GCLK1 (already set to 48 MHz by Arduino core) 143 GCLK->PCHCTRL[TIMER_GCLK_ID].bit.CHEN = 0; // Disable channel 144 while(GCLK->PCHCTRL[TIMER_GCLK_ID].bit.CHEN); // Wait for disable 145 GCLK_PCHCTRL_Type pchctrl; 146 pchctrl.bit.GEN = GCLK_PCHCTRL_GEN_GCLK1_Val; 147 pchctrl.bit.CHEN = 1; 148 GCLK->PCHCTRL[TIMER_GCLK_ID].reg = pchctrl.reg; 149 while(!GCLK->PCHCTRL[TIMER_GCLK_ID].bit.CHEN); // Wait for enable 150 151 // Disable timer before configuring it 152 TIMER->COUNT16.CTRLA.bit.ENABLE = 0; 153 while(TIMER->COUNT16.SYNCBUSY.bit.ENABLE); 154 155 // 16-bit counter mode, 1:1 prescale, match-frequency generation mode 156 TIMER->COUNT16.CTRLA.bit.MODE = TC_CTRLA_MODE_COUNT16; 157 TIMER->COUNT16.CTRLA.bit.PRESCALER = TC_CTRLA_PRESCALER_DIV1_Val; 158 TIMER->COUNT16.WAVE.bit.WAVEGEN = TC_WAVE_WAVEGEN_MFRQ_Val; 159 160 TIMER->COUNT16.CTRLBCLR.reg = TC_CTRLBCLR_DIR; // Count up 161 while(TIMER->COUNT16.SYNCBUSY.bit.CTRLB); 162 163 voicePitch(1.0); // Set timer interval 164 165 TIMER->COUNT16.INTENSET.reg = TC_INTENSET_OVF; // Overflow interrupt 166 NVIC_DisableIRQ(TIMER_IRQN); 167 NVIC_ClearPendingIRQ(TIMER_IRQN); 168 NVIC_SetPriority(TIMER_IRQN, 0); // Top priority 169 NVIC_EnableIRQ(TIMER_IRQN); 170 171 TIMER->COUNT16.CTRLA.bit.ENABLE = 1; // Enable timer 172 while(TIMER->COUNT16.SYNCBUSY.bit.ENABLE); // Wait for it 173 174 return true; // Success 175 } 176 177 // SET PITCH --------------------------------------------------------------- 178 179 // Set pitch adjustment, higher numbers = higher pitch. 0 < pitch < inf 180 // 0.5 = halve frequency (1 octave down) 181 // 1.0 = normal playback 182 // 2.0 = double frequency (1 octave up) 183 // Available pitch adjustment range depends on various hardware factors 184 // (SPI speed, timer/counter resolution, etc.), and the actual pitch 185 // adjustment (after appying constraints) will be returned. 186 float voicePitch(float p) { 187 float desiredPlaybackRate = sampleRate * p; 188 int32_t period = (int32_t)(48000000.0 / desiredPlaybackRate + 0.5); 189 if(period > 2500) period = 2500; // Hard limit is 65536, 2.5K is a practical limit 190 else if(period < 250) period = 250; // Leave some cycles for IRQ handler 191 TIMER->COUNT16.CC[0].reg = period - 1; 192 while(TIMER->COUNT16.SYNCBUSY.bit.CC0); 193 float actualPlaybackRate = 48000000.0 / (float)period; 194 p = (actualPlaybackRate / sampleRate); // New pitch 195 jumpThreshold = (int)(jump * p + 0.5); 196 return p; 197 } 198 199 // SET GAIN ---------------------------------------------------------------- 200 201 void voiceGain(float g) { 202 if(g >= (65535.0/256.0)) micGain = 65535; 203 else if(g < 0.0) micGain = 0; 204 else micGain = (uint16_t)(g * 256.0 + 0.5); 205 } 206 207 // INTERRUPT HANDLERS ------------------------------------------------------ 208 209 static uint16_t const sincfilter[64] = { 0, 2, 9, 21, 39, 63, 94, 132, 179, 236, 302, 379, 467, 565, 674, 792, 920, 1055, 1196, 1341, 1487, 1633, 1776, 1913, 2042, 2159, 2263, 2352, 2422, 2474, 2506, 2516, 2506, 2474, 2422, 2352, 2263, 2159, 2042, 1913, 1776, 1633, 1487, 1341, 1196, 1055, 920, 792, 674, 565, 467, 379, 302, 236, 179, 132, 94, 63, 39, 21, 9, 2, 0, 0 }; 210 211 void PDM_SERCOM_HANDLER(void) { 212 static bool evenWord = 1; // Alternates 0/1 with each interrupt call 213 static uint32_t sumTemp = 0; // Temp. value used across 2 interrupt calls 214 // Shenanigans: SPI data read/write are shadowed...even though it appears 215 // the same register here, it's legit to write new MOSI value before 216 // reading the received MISO value from the same location. This helps 217 // avoid a gap between words...provides a steady stream of bits. 218 *dataReg = 0; // Write clears DRE flag, starts next xfer 219 uint32_t sample = *dataReg; // Read last-received word 220 221 uint32_t sum = 0; // local var = register = faster than sumTemp 222 if(evenWord) { // Even-numbered 32-bit word... 223 // At default speed and optimization settings (120 MHz -Os), the PDM- 224 // servicing interrupt consumes about 12.5% of CPU time. Though this 225 // code looks bulky, it's actually reasonably efficient (sincfilter[] is 226 // const, so these compile down to constants, there is no array lookup, 227 // any any zero-value element refs will be removed by the compiler). 228 // Tested MANY methods and this was hard to beat. One managed just under 229 // 10% load, but required 4KB of tables...not worth it for small boost. 230 // Can get an easy boost with overclock and optimizer tweaks. 231 if(sample & 0x00000001) sum += sincfilter[ 0]; 232 if(sample & 0x00000002) sum += sincfilter[ 1]; 233 if(sample & 0x00000004) sum += sincfilter[ 2]; 234 if(sample & 0x00000008) sum += sincfilter[ 3]; 235 if(sample & 0x00000010) sum += sincfilter[ 4]; 236 if(sample & 0x00000020) sum += sincfilter[ 5]; 237 if(sample & 0x00000040) sum += sincfilter[ 6]; 238 if(sample & 0x00000080) sum += sincfilter[ 7]; 239 if(sample & 0x00000100) sum += sincfilter[ 8]; 240 if(sample & 0x00000200) sum += sincfilter[ 9]; 241 if(sample & 0x00000400) sum += sincfilter[10]; 242 if(sample & 0x00000800) sum += sincfilter[11]; 243 if(sample & 0x00001000) sum += sincfilter[12]; 244 if(sample & 0x00002000) sum += sincfilter[13]; 245 if(sample & 0x00004000) sum += sincfilter[14]; 246 if(sample & 0x00008000) sum += sincfilter[15]; 247 if(sample & 0x00010000) sum += sincfilter[16]; 248 if(sample & 0x00020000) sum += sincfilter[17]; 249 if(sample & 0x00040000) sum += sincfilter[18]; 250 if(sample & 0x00080000) sum += sincfilter[19]; 251 if(sample & 0x00100000) sum += sincfilter[20]; 252 if(sample & 0x00200000) sum += sincfilter[21]; 253 if(sample & 0x00400000) sum += sincfilter[22]; 254 if(sample & 0x00800000) sum += sincfilter[23]; 255 if(sample & 0x01000000) sum += sincfilter[24]; 256 if(sample & 0x02000000) sum += sincfilter[25]; 257 if(sample & 0x04000000) sum += sincfilter[26]; 258 if(sample & 0x08000000) sum += sincfilter[27]; 259 if(sample & 0x10000000) sum += sincfilter[28]; 260 if(sample & 0x20000000) sum += sincfilter[29]; 261 if(sample & 0x40000000) sum += sincfilter[30]; 262 if(sample & 0x80000000) sum += sincfilter[31]; 263 sumTemp = sum; // Copy register to static var for next call 264 } else { 265 if(sample & 0x00000001) sum += sincfilter[32]; 266 if(sample & 0x00000002) sum += sincfilter[33]; 267 if(sample & 0x00000004) sum += sincfilter[34]; 268 if(sample & 0x00000008) sum += sincfilter[35]; 269 if(sample & 0x00000010) sum += sincfilter[36]; 270 if(sample & 0x00000020) sum += sincfilter[37]; 271 if(sample & 0x00000040) sum += sincfilter[38]; 272 if(sample & 0x00000080) sum += sincfilter[39]; 273 if(sample & 0x00000100) sum += sincfilter[40]; 274 if(sample & 0x00000200) sum += sincfilter[41]; 275 if(sample & 0x00000400) sum += sincfilter[42]; 276 if(sample & 0x00000800) sum += sincfilter[43]; 277 if(sample & 0x00001000) sum += sincfilter[44]; 278 if(sample & 0x00002000) sum += sincfilter[45]; 279 if(sample & 0x00004000) sum += sincfilter[46]; 280 if(sample & 0x00008000) sum += sincfilter[47]; 281 if(sample & 0x00010000) sum += sincfilter[48]; 282 if(sample & 0x00020000) sum += sincfilter[49]; 283 if(sample & 0x00040000) sum += sincfilter[50]; 284 if(sample & 0x00080000) sum += sincfilter[51]; 285 if(sample & 0x00100000) sum += sincfilter[52]; 286 if(sample & 0x00200000) sum += sincfilter[53]; 287 if(sample & 0x00400000) sum += sincfilter[54]; 288 if(sample & 0x00800000) sum += sincfilter[55]; 289 if(sample & 0x01000000) sum += sincfilter[56]; 290 if(sample & 0x02000000) sum += sincfilter[57]; 291 if(sample & 0x04000000) sum += sincfilter[58]; 292 if(sample & 0x08000000) sum += sincfilter[59]; 293 if(sample & 0x10000000) sum += sincfilter[60]; 294 if(sample & 0x20000000) sum += sincfilter[61]; 295 if(sample & 0x40000000) sum += sincfilter[62]; 296 if(sample & 0x80000000) sum += sincfilter[63]; 297 sum += sumTemp; // Add static var from last call 298 299 // 'sum' is new raw audio value -- process it -------------------------- 300 301 uint16_t dcOffset; 302 303 dcSum += sum; // Accumulate long-term average for DC offset correction 304 if(++dcCounter < DC_PERIOD) { 305 // Interpolate between dcOffsetPrior and dcOffsetNext 306 dcOffset = dcOffsetPrior + (dcOffsetNext - dcOffsetPrior) * dcCounter / DC_PERIOD; 307 } else { 308 // End of period reached, move 'next' to 'previous,' calc new 'next' from avg 309 dcOffsetPrior = dcOffset = dcOffsetNext; 310 dcOffsetNext = dcSum / DC_PERIOD; 311 dcCounter = dcSum = 0; 312 } 313 314 // Adjust raw reading by DC offset to center (ish) it, scale by mic gain 315 int32_t adjusted = ((int32_t)sum - dcOffset) * micGain / 256; 316 317 // Go back to uint16_t space and clip to 16-bit range 318 adjusted += 32768; 319 if(adjusted > 65535) adjusted = 65535; 320 else if(adjusted < 0) adjusted = 0; 321 322 // So, the theory is, in the future some basic pitch detection could be 323 // added right about here, which could be used to improve the seam 324 // transitions in the playback interrupt (and possibly other things, 325 // like dynamic adjustment of the playback rate to do monotone and other 326 // effects). Actual usable pitch detection on speech turns out to be One 327 // Of Those Nearly Insurmountable Problems In Audio Processing...if 328 // you're thinking "oh just count the zero crossings" "just use an FFT" 329 // it's really not that simple, trust me, please, I've been reading 330 // everything on this, speech waveforms are jerks. Had the beginnings of 331 // some "maybe good enough approximation for a hacky microcontroller 332 // project" code here, but it's pulled out for now for the sake of 333 // getting something not-broken in folks' hands in a sensible timeframe. 334 if(++recIndex >= recBufSize) recIndex = 0; 335 recBuf[recIndex] = adjusted; 336 337 // Outside code can use the value of voiceLastReading if you want to 338 // do an approximate live waveform display, or dynamic gain adjustment 339 // based on mic input, or other stuff. This won't give you every single 340 // sample in the recording buffer one-by-one sequentially...it's just 341 // the last thing that was stored prior to whatever time you polled it, 342 // but may still have some uses. 343 voiceLastReading = adjusted; 344 } 345 evenWord ^= 1; 346 } 347 348 // Playback timer interrupt 349 void TIMER_IRQ_HANDLER(void) { 350 TIMER->COUNT16.INTFLAG.reg = TC_INTFLAG_OVF; 351 352 // Do analog writes pronto so output timing is consistent 353 analogWrite(A0, nextOut); 354 analogWrite(A1, nextOut); 355 // Then we can take whatever variable time for processing the next cycle... 356 357 if(++playbackIndex >= recBufSize) playbackIndex = 0; 358 359 if(jumping) { 360 // A waveform-blending transition is in-progress 361 uint32_t w1 = 65536UL * jumpCount / jump, // ramp playbackIndexJumped up (14 bits) 362 w2 = 65536UL - w1; // ramp playbackIndex down (14 bits) 363 nextOut = (recBuf[playbackIndexJumped] * w1 + recBuf[playbackIndex] * w2) >> 20; // 28 bit result->12 bits 364 if(++jumpCount >= jump) { 365 playbackIndex = playbackIndexJumped; 366 jumpCount = 1; 367 jumping = false; 368 } else { 369 if(++playbackIndexJumped >= recBufSize) playbackIndexJumped = 0; 370 } 371 } else { 372 nextOut = recBuf[playbackIndex] >> 4; // 16->12 bit 373 if(playbackRate >= sampleRate) { // Sped up 374 // Playback may overtake recording, need to back off periodically 375 int16_t dist = (recIndex >= playbackIndex) ? 376 (recIndex - playbackIndex) : (recBufSize - (playbackIndex - recIndex)); 377 if(dist <= jumpThreshold) { 378 playbackIndexJumped = playbackIndex - jump; 379 if(playbackIndexJumped < 0) playbackIndexJumped += recBufSize; 380 jumping = true; 381 } 382 } else { // Slowed down 383 // Playback may underflow recording, need to advance periodically 384 int16_t dist = (playbackIndex >= recIndex) ? 385 (playbackIndex - recIndex) : (recBufSize - (recIndex - playbackIndex)); 386 if(dist <= jumpThreshold) { 387 playbackIndexJumped = (playbackIndex + jump) % recBufSize; 388 jumping = true; 389 } 390 } 391 } 392 }