/ MonsterMaskVoiceChanger / pdmvoice.ino
pdmvoice.ino
  1  // SPDX-FileCopyrightText: 2019 Phillip Burgess for Adafruit Industries
  2  //
  3  // SPDX-License-Identifier: MIT
  4  
  5  // Basic voice changer code. This version is specific to the Adafruit
  6  // MONSTER M4SK board using a PDM microphone.
  7  
  8  #include <SPI.h>
  9  
 10  #define MIN_PITCH_HZ   65
 11  #define MAX_PITCH_HZ 1600
 12  #define TYP_PITCH_HZ  175
 13  
 14  // Playback timer stuff - use TC3 on MONSTER M4SK (no TC4 on this board)
 15  #define TIMER             TC3
 16  #define TIMER_IRQN        TC3_IRQn
 17  #define TIMER_IRQ_HANDLER TC3_Handler
 18  #define TIMER_GCLK_ID     TC3_GCLK_ID
 19  #define TIMER_GCM_ID      GCM_TC2_TC3
 20  
 21  // PDM mic allows 1.0 to 3.25 MHz max clock (2.4 typical).
 22  // SPI native max is is 24 MHz, so available speeds are 12, 6, 3 MHz.
 23  #define SPI_BITRATE 3000000
 24  static SPISettings settings(SPI_BITRATE, LSBFIRST, SPI_MODE0);
 25  // 3 MHz / 32 bits = 93,750 Hz interrupt frequency
 26  // 2 interrupts/sample = 46,875 Hz audio sample rate
 27  const float sampleRate = (float)SPI_BITRATE / 64.0;
 28  // sampleRate is float in case factors change to make it not divide evenly.
 29  // It DOES NOT CHANGE over time, only playbackRate does.
 30  
 31  // Although SPI lib now has an option to get an SPI object's SERCOM number
 32  // at run time, the interrupt handler MUST be declared at compile time...
 33  // so it's necessary to know the SERCOM # ahead of time anyway, oh well.
 34  #define PDM_SERCOM         SERCOM3 // PDM mic SPI SERCOM on MONSTER M4SK
 35  #define PDM_SPI            SPI2    // PDM mic SPI peripheral
 36  #define PDM_SERCOM_HANDLER SERCOM3_0_Handler
 37  #define PDM_SERCOM_IRQn    SERCOM3_0_IRQn // _0_IRQn is DRE interrupt
 38  
 39  static Sercom            *sercom;
 40  static volatile uint32_t *dataReg;
 41  
 42  Sercom * const sercomList[] = {
 43    SERCOM0, SERCOM1, SERCOM2, SERCOM3,
 44  #if defined(SERCOM4)
 45    SERCOM4,
 46  #endif
 47  #if defined(SERCOM5)
 48    SERCOM5,
 49  #endif
 50  #if defined(SERCOM6)
 51    SERCOM6,
 52  #endif
 53  #if defined(SERCOM7)
 54    SERCOM7,
 55  #endif
 56  };
 57  
 58  static float          playbackRate     = sampleRate;
 59  static uint16_t      *recBuf           = NULL;
 60  // recBuf currently gets allocated (in voiceSetup()) for two full cycles of
 61  // the lowest pitch we're likely to encounter. Right now it doesn't really
 62  // NEED to be this size, but if pitch detection is added in the future then
 63  // this'll become more useful.
 64  // 46,875 sampling rate from mic, 65 Hz lowest pitch -> 2884 bytes.
 65  static const uint16_t recBufSize       = (uint16_t)(sampleRate / (float)MIN_PITCH_HZ * 2.0 + 0.5);
 66  static int16_t        recIndex         = 0;
 67  static int16_t        playbackIndex    = 0;
 68  
 69  volatile uint16_t     voiceLastReading = 0;
 70  
 71  #define DC_PERIOD     4096 // Recalculate DC offset this many samplings
 72  // DC_PERIOD does NOT need to be a power of 2, but might save a few cycles.
 73  // PDM rate is 46875, so 4096 = 11.44 times/sec
 74  static uint16_t       dcCounter        = 0;     // Rolls over every DC_PERIOD samples
 75  static uint32_t       dcSum            = 0;     // Accumulates DC_PERIOD samples
 76  static uint16_t       dcOffsetPrior    = 32768; // DC offset interpolates linearly
 77  static uint16_t       dcOffsetNext     = 32768; // between these two values
 78  
 79  static uint16_t       micGain          = 256;   // 1:1
 80  
 81  // Just playing back directly from the recording circular buffer produces
 82  // audible clicks as the waveforms rarely align at the beginning and end of
 83  // the buffer. So what we do is advance or push back the playback index a
 84  // certain amount when it's likely to overtake or underflow the recording
 85  // index, and interpolate from the current to the jumped-forward-or-back
 86  // readings over a short period. In a perfect world, that "certain amount"
 87  // would be one wavelength of the current voice pitch...BUT...with no pitch
 88  // detecton currently, we instead use a fixed middle-of-the-road value:
 89  // TYP_PITCH_HZ, 175 by default, which is a bit below typical female spoken
 90  // vocal range and a bit above typical male spoken range. This all goes out
 91  // the window with singing, and of course young people will have a higher
 92  // speech range, is just a crude catch-all approximation.
 93  static const uint16_t jump      = (int)(sampleRate / (float)TYP_PITCH_HZ + 0.5);
 94  static const uint16_t interp    = jump / 4; // Interp time = 1/4 waveform
 95  static bool           jumping   = false;
 96  static uint16_t       jumpCount = 1;
 97  static int16_t        jumpThreshold;
 98  static int16_t        playbackIndexJumped;
 99  static uint16_t       nextOut   = 2048;
100  
101  float voicePitch(float p);
102  
103  // START PITCH SHIFT (no arguments) ----------------------------------------
104  
105  bool voiceSetup(void) {
106  
107    // Allocate circular buffer for audio
108    if(NULL == (recBuf = (uint16_t *)malloc(recBufSize * sizeof(uint16_t)))) {
109      return false; // Fail
110    }
111  
112    // Set up PDM microphone input -------------------------------------------
113  
114    PDM_SPI.begin();
115    PDM_SPI.beginTransaction(settings); // this SPI transaction is left open
116    sercom  = sercomList[PDM_SPI.getSercomIndex()];
117    dataReg = PDM_SPI.getDataRegister();
118  
119    // Enabling 32-bit SPI must be done AFTER SPI.begin() which
120    // resets registers. But SPI.CTRLC (where 32-bit mode is set) is
121    // enable-protected, so peripheral must be disabled temporarily...
122    sercom->SPI.CTRLA.bit.ENABLE  = 0;      // Disable SPI
123    while(sercom->SPI.SYNCBUSY.bit.ENABLE); // Wait for disable
124    sercom->SPI.CTRLC.bit.DATA32B = 1;      // Enable 32-bit mode
125    sercom->SPI.CTRLA.bit.ENABLE  = 1;      // Re-enable SPI
126    while(sercom->SPI.SYNCBUSY.bit.ENABLE); // Wait for enable
127    // 4-byte word length is implicit in 32-bit mode,
128    // no need to set up LENGTH register.
129  
130    sercom->SPI.INTENSET.bit.DRE  = 1;      // Data-register-empty interrupt
131    NVIC_DisableIRQ(PDM_SERCOM_IRQn);
132    NVIC_ClearPendingIRQ(PDM_SERCOM_IRQn);
133    NVIC_SetPriority(PDM_SERCOM_IRQn, 0);   // Top priority
134    NVIC_EnableIRQ(PDM_SERCOM_IRQn);
135  
136    sercom->SPI.DATA.bit.DATA     = 0;      // Kick off SPI free-run
137  
138    // Set up analog output & timer ------------------------------------------
139  
140    analogWriteResolution(12);
141  
142    // Feed TIMER off GCLK1 (already set to 48 MHz by Arduino core)
143    GCLK->PCHCTRL[TIMER_GCLK_ID].bit.CHEN = 0;     // Disable channel
144    while(GCLK->PCHCTRL[TIMER_GCLK_ID].bit.CHEN);  // Wait for disable
145    GCLK_PCHCTRL_Type pchctrl;
146    pchctrl.bit.GEN                       = GCLK_PCHCTRL_GEN_GCLK1_Val;
147    pchctrl.bit.CHEN                      = 1;
148    GCLK->PCHCTRL[TIMER_GCLK_ID].reg      = pchctrl.reg;
149    while(!GCLK->PCHCTRL[TIMER_GCLK_ID].bit.CHEN); // Wait for enable
150  
151    // Disable timer before configuring it
152    TIMER->COUNT16.CTRLA.bit.ENABLE       = 0;
153    while(TIMER->COUNT16.SYNCBUSY.bit.ENABLE);
154  
155    // 16-bit counter mode, 1:1 prescale, match-frequency generation mode
156    TIMER->COUNT16.CTRLA.bit.MODE      = TC_CTRLA_MODE_COUNT16;
157    TIMER->COUNT16.CTRLA.bit.PRESCALER = TC_CTRLA_PRESCALER_DIV1_Val;
158    TIMER->COUNT16.WAVE.bit.WAVEGEN    = TC_WAVE_WAVEGEN_MFRQ_Val;
159  
160    TIMER->COUNT16.CTRLBCLR.reg        = TC_CTRLBCLR_DIR; // Count up
161    while(TIMER->COUNT16.SYNCBUSY.bit.CTRLB);
162  
163    voicePitch(1.0); // Set timer interval
164  
165    TIMER->COUNT16.INTENSET.reg        = TC_INTENSET_OVF; // Overflow interrupt
166    NVIC_DisableIRQ(TIMER_IRQN);
167    NVIC_ClearPendingIRQ(TIMER_IRQN);
168    NVIC_SetPriority(TIMER_IRQN, 0); // Top priority
169    NVIC_EnableIRQ(TIMER_IRQN);
170  
171    TIMER->COUNT16.CTRLA.bit.ENABLE    = 1;    // Enable timer
172    while(TIMER->COUNT16.SYNCBUSY.bit.ENABLE); // Wait for it
173  
174    return true; // Success
175  }
176  
177  // SET PITCH ---------------------------------------------------------------
178  
179  // Set pitch adjustment, higher numbers = higher pitch. 0 < pitch < inf
180  // 0.5 = halve frequency (1 octave down)
181  // 1.0 = normal playback
182  // 2.0 = double frequency (1 octave up)
183  // Available pitch adjustment range depends on various hardware factors
184  // (SPI speed, timer/counter resolution, etc.), and the actual pitch
185  // adjustment (after appying constraints) will be returned.
186  float voicePitch(float p) {
187    float   desiredPlaybackRate = sampleRate * p;
188    int32_t period = (int32_t)(48000000.0 / desiredPlaybackRate + 0.5);
189    if(period > 2500)     period = 2500; // Hard limit is 65536, 2.5K is a practical limit
190    else if(period < 250) period =  250; // Leave some cycles for IRQ handler
191    TIMER->COUNT16.CC[0].reg = period - 1;
192    while(TIMER->COUNT16.SYNCBUSY.bit.CC0);
193    float   actualPlaybackRate = 48000000.0 / (float)period;
194    p = (actualPlaybackRate / sampleRate); // New pitch
195    jumpThreshold = (int)(jump * p + 0.5);
196    return p;
197  }
198  
199  // SET GAIN ----------------------------------------------------------------
200  
201  void voiceGain(float g) {
202    if(g >= (65535.0/256.0)) micGain = 65535;
203    else if(g < 0.0)         micGain = 0;
204    else                     micGain = (uint16_t)(g * 256.0 + 0.5);
205  }
206  
207  // INTERRUPT HANDLERS ------------------------------------------------------
208  
209  static uint16_t const sincfilter[64] = { 0, 2, 9, 21, 39, 63, 94, 132, 179, 236, 302, 379, 467, 565, 674, 792, 920, 1055, 1196, 1341, 1487, 1633, 1776, 1913, 2042, 2159, 2263, 2352, 2422, 2474, 2506, 2516, 2506, 2474, 2422, 2352, 2263, 2159, 2042, 1913, 1776, 1633, 1487, 1341, 1196, 1055, 920, 792, 674, 565, 467, 379, 302, 236, 179, 132, 94, 63, 39, 21, 9, 2, 0, 0 };
210  
211  void PDM_SERCOM_HANDLER(void) {
212    static bool     evenWord = 1; // Alternates 0/1 with each interrupt call
213    static uint32_t sumTemp  = 0; // Temp. value used across 2 interrupt calls
214    // Shenanigans: SPI data read/write are shadowed...even though it appears
215    // the same register here, it's legit to write new MOSI value before
216    // reading the received MISO value from the same location. This helps
217    // avoid a gap between words...provides a steady stream of bits.
218    *dataReg = 0;               // Write clears DRE flag, starts next xfer
219    uint32_t sample = *dataReg; // Read last-received word
220  
221    uint32_t sum = 0;  // local var = register = faster than sumTemp
222    if(evenWord) {     // Even-numbered 32-bit word...
223      // At default speed and optimization settings (120 MHz -Os), the PDM-
224      // servicing interrupt consumes about 12.5% of CPU time. Though this
225      // code looks bulky, it's actually reasonably efficient (sincfilter[] is
226      // const, so these compile down to constants, there is no array lookup,
227      // any any zero-value element refs will be removed by the compiler).
228      // Tested MANY methods and this was hard to beat. One managed just under
229      // 10% load, but required 4KB of tables...not worth it for small boost.
230      // Can get an easy boost with overclock and optimizer tweaks.
231      if(sample & 0x00000001) sum += sincfilter[ 0];
232      if(sample & 0x00000002) sum += sincfilter[ 1];
233      if(sample & 0x00000004) sum += sincfilter[ 2];
234      if(sample & 0x00000008) sum += sincfilter[ 3];
235      if(sample & 0x00000010) sum += sincfilter[ 4];
236      if(sample & 0x00000020) sum += sincfilter[ 5];
237      if(sample & 0x00000040) sum += sincfilter[ 6];
238      if(sample & 0x00000080) sum += sincfilter[ 7];
239      if(sample & 0x00000100) sum += sincfilter[ 8];
240      if(sample & 0x00000200) sum += sincfilter[ 9];
241      if(sample & 0x00000400) sum += sincfilter[10];
242      if(sample & 0x00000800) sum += sincfilter[11];
243      if(sample & 0x00001000) sum += sincfilter[12];
244      if(sample & 0x00002000) sum += sincfilter[13];
245      if(sample & 0x00004000) sum += sincfilter[14];
246      if(sample & 0x00008000) sum += sincfilter[15];
247      if(sample & 0x00010000) sum += sincfilter[16];
248      if(sample & 0x00020000) sum += sincfilter[17];
249      if(sample & 0x00040000) sum += sincfilter[18];
250      if(sample & 0x00080000) sum += sincfilter[19];
251      if(sample & 0x00100000) sum += sincfilter[20];
252      if(sample & 0x00200000) sum += sincfilter[21];
253      if(sample & 0x00400000) sum += sincfilter[22];
254      if(sample & 0x00800000) sum += sincfilter[23];
255      if(sample & 0x01000000) sum += sincfilter[24];
256      if(sample & 0x02000000) sum += sincfilter[25];
257      if(sample & 0x04000000) sum += sincfilter[26];
258      if(sample & 0x08000000) sum += sincfilter[27];
259      if(sample & 0x10000000) sum += sincfilter[28];
260      if(sample & 0x20000000) sum += sincfilter[29];
261      if(sample & 0x40000000) sum += sincfilter[30];
262      if(sample & 0x80000000) sum += sincfilter[31];
263      sumTemp = sum; // Copy register to static var for next call
264    } else {
265      if(sample & 0x00000001) sum += sincfilter[32];
266      if(sample & 0x00000002) sum += sincfilter[33];
267      if(sample & 0x00000004) sum += sincfilter[34];
268      if(sample & 0x00000008) sum += sincfilter[35];
269      if(sample & 0x00000010) sum += sincfilter[36];
270      if(sample & 0x00000020) sum += sincfilter[37];
271      if(sample & 0x00000040) sum += sincfilter[38];
272      if(sample & 0x00000080) sum += sincfilter[39];
273      if(sample & 0x00000100) sum += sincfilter[40];
274      if(sample & 0x00000200) sum += sincfilter[41];
275      if(sample & 0x00000400) sum += sincfilter[42];
276      if(sample & 0x00000800) sum += sincfilter[43];
277      if(sample & 0x00001000) sum += sincfilter[44];
278      if(sample & 0x00002000) sum += sincfilter[45];
279      if(sample & 0x00004000) sum += sincfilter[46];
280      if(sample & 0x00008000) sum += sincfilter[47];
281      if(sample & 0x00010000) sum += sincfilter[48];
282      if(sample & 0x00020000) sum += sincfilter[49];
283      if(sample & 0x00040000) sum += sincfilter[50];
284      if(sample & 0x00080000) sum += sincfilter[51];
285      if(sample & 0x00100000) sum += sincfilter[52];
286      if(sample & 0x00200000) sum += sincfilter[53];
287      if(sample & 0x00400000) sum += sincfilter[54];
288      if(sample & 0x00800000) sum += sincfilter[55];
289      if(sample & 0x01000000) sum += sincfilter[56];
290      if(sample & 0x02000000) sum += sincfilter[57];
291      if(sample & 0x04000000) sum += sincfilter[58];
292      if(sample & 0x08000000) sum += sincfilter[59];
293      if(sample & 0x10000000) sum += sincfilter[60];
294      if(sample & 0x20000000) sum += sincfilter[61];
295      if(sample & 0x40000000) sum += sincfilter[62];
296      if(sample & 0x80000000) sum += sincfilter[63];
297      sum += sumTemp; // Add static var from last call
298  
299      // 'sum' is new raw audio value -- process it --------------------------
300  
301      uint16_t dcOffset;
302    
303      dcSum += sum; // Accumulate long-term average for DC offset correction
304      if(++dcCounter < DC_PERIOD) {
305        // Interpolate between dcOffsetPrior and dcOffsetNext
306        dcOffset = dcOffsetPrior + (dcOffsetNext - dcOffsetPrior) * dcCounter / DC_PERIOD;
307      } else {
308        // End of period reached, move 'next' to 'previous,' calc new 'next' from avg
309        dcOffsetPrior = dcOffset = dcOffsetNext;
310        dcOffsetNext  = dcSum / DC_PERIOD;
311        dcCounter     = dcSum    = 0;
312      }
313  
314      // Adjust raw reading by DC offset to center (ish) it, scale by mic gain
315      int32_t adjusted = ((int32_t)sum - dcOffset) * micGain / 256;
316  
317      // Go back to uint16_t space and clip to 16-bit range
318      adjusted += 32768;
319      if(adjusted > 65535)  adjusted = 65535;
320      else if(adjusted < 0) adjusted = 0;
321  
322      // So, the theory is, in the future some basic pitch detection could be
323      // added right about here, which could be used to improve the seam
324      // transitions in the playback interrupt (and possibly other things,
325      // like dynamic adjustment of the playback rate to do monotone and other
326      // effects). Actual usable pitch detection on speech turns out to be One
327      // Of Those Nearly Insurmountable Problems In Audio Processing...if
328      // you're thinking "oh just count the zero crossings" "just use an FFT"
329      // it's really not that simple, trust me, please, I've been reading
330      // everything on this, speech waveforms are jerks. Had the beginnings of
331      // some "maybe good enough approximation for a hacky microcontroller
332      // project" code here, but it's pulled out for now for the sake of
333      // getting something not-broken in folks' hands in a sensible timeframe.
334      if(++recIndex >= recBufSize) recIndex = 0;
335      recBuf[recIndex] = adjusted;
336  
337      // Outside code can use the value of voiceLastReading if you want to
338      // do an approximate live waveform display, or dynamic gain adjustment
339      // based on mic input, or other stuff. This won't give you every single
340      // sample in the recording buffer one-by-one sequentially...it's just
341      // the last thing that was stored prior to whatever time you polled it,
342      // but may still have some uses.
343      voiceLastReading = adjusted;
344    }
345    evenWord ^= 1;
346  }
347  
348  // Playback timer interrupt
349  void TIMER_IRQ_HANDLER(void) {
350    TIMER->COUNT16.INTFLAG.reg = TC_INTFLAG_OVF;
351  
352    // Do analog writes pronto so output timing is consistent
353    analogWrite(A0, nextOut);
354    analogWrite(A1, nextOut);
355    // Then we can take whatever variable time for processing the next cycle...
356  
357    if(++playbackIndex >= recBufSize) playbackIndex = 0;
358  
359    if(jumping) {
360      // A waveform-blending transition is in-progress
361      uint32_t w1 = 65536UL * jumpCount / jump, // ramp playbackIndexJumped up (14 bits)
362               w2 = 65536UL - w1;               // ramp playbackIndex down (14 bits)
363      nextOut = (recBuf[playbackIndexJumped] * w1 + recBuf[playbackIndex] * w2) >> 20; // 28 bit result->12 bits
364      if(++jumpCount >= jump) {
365        playbackIndex = playbackIndexJumped;
366        jumpCount     = 1;
367        jumping       = false;
368      } else {
369        if(++playbackIndexJumped >= recBufSize) playbackIndexJumped = 0;
370      }
371    } else {
372      nextOut = recBuf[playbackIndex] >> 4; // 16->12 bit
373      if(playbackRate >= sampleRate) { // Sped up
374        // Playback may overtake recording, need to back off periodically
375        int16_t dist = (recIndex >= playbackIndex) ?
376          (recIndex - playbackIndex) : (recBufSize - (playbackIndex - recIndex));
377        if(dist <= jumpThreshold) {
378          playbackIndexJumped = playbackIndex - jump;
379          if(playbackIndexJumped < 0) playbackIndexJumped += recBufSize;
380          jumping             = true;
381        }
382      } else { // Slowed down
383        // Playback may underflow recording, need to advance periodically
384        int16_t dist = (playbackIndex >= recIndex) ?
385          (playbackIndex - recIndex) : (recBufSize - (recIndex - playbackIndex));
386        if(dist <= jumpThreshold) {
387          playbackIndexJumped = (playbackIndex + jump) % recBufSize;
388          jumping             = true;
389        }
390      }
391    }
392  }