pdmvoice.cpp
1 // SPDX-FileCopyrightText: 2019 Phillip Burgess for Adafruit Industries 2 // 3 // SPDX-License-Identifier: MIT 4 5 // Basic voice changer code. This version is specific to the Adafruit 6 // MONSTER M4SK board using a PDM microphone. 7 8 #if defined(ADAFRUIT_MONSTER_M4SK_EXPRESS) 9 10 #include "globals.h" 11 #include <SPI.h> 12 #include <Adafruit_ZeroPDMSPI.h> 13 14 #define MIN_PITCH_HZ 65 15 #define MAX_PITCH_HZ 1600 16 #define TYP_PITCH_HZ 175 17 18 static void voiceOutCallback(void); 19 static float actualPlaybackRate; 20 21 // PDM mic allows 1.0 to 3.25 MHz max clock (2.4 typical). 22 // SPI native max is is 24 MHz, so available speeds are 12, 6, 3 MHz. 23 #define SPI_BITRATE 3000000 24 // 3 MHz / 32 bits = 93,750 Hz interrupt frequency 25 // 2 interrupts/sample = 46,875 Hz audio sample rate 26 const float sampleRate = (float)SPI_BITRATE / 64.0; 27 // sampleRate is float in case factors change to make it not divide evenly. 28 // It DOES NOT CHANGE over time, only playbackRate does. 29 30 // Although SPI lib now has an option to get an SPI object's SERCOM number 31 // at run time, the interrupt handler MUST be declared at compile time... 32 // so it's necessary to know the SERCOM # ahead of time anyway, oh well. 33 #define PDM_SPI SPI2 // PDM mic SPI peripheral 34 #define PDM_SERCOM_HANDLER SERCOM3_0_Handler 35 36 Adafruit_ZeroPDMSPI pdmspi(&PDM_SPI); 37 38 static float playbackRate = sampleRate; 39 static uint16_t *recBuf = NULL; 40 // recBuf currently gets allocated (in voiceSetup()) for two full cycles of 41 // the lowest pitch we're likely to encounter. Right now it doesn't really 42 // NEED to be this size, but if pitch detection is added in the future then 43 // this'll become more useful. 44 // 46,875 sampling rate from mic, 65 Hz lowest pitch -> 2884 bytes. 45 static const uint16_t recBufSize = (uint16_t)(sampleRate / (float)MIN_PITCH_HZ * 2.0 + 0.5); 46 static int16_t recIndex = 0; 47 static int16_t playbackIndex = 0; 48 49 volatile uint16_t voiceLastReading = 32768; 50 volatile uint16_t voiceMin = 32768; 51 volatile uint16_t voiceMax = 32768; 52 53 #define MOD_MIN 20 // Lowest supported modulation frequency (lower = more RAM use) 54 static uint8_t modWave = 0; // Modulation wave type (none, sine, square, tri, saw) 55 static uint8_t *modBuf = NULL; // Modulation waveform buffer 56 static uint32_t modIndex = 0; // Current position in modBuf 57 static uint32_t modLen = 0; // Currently used amount of modBuf based on modFreq 58 59 // Just playing back directly from the recording circular buffer produces 60 // audible clicks as the waveforms rarely align at the beginning and end of 61 // the buffer. So what we do is advance or push back the playback index a 62 // certain amount when it's likely to overtake or underflow the recording 63 // index, and interpolate from the current to the jumped-forward-or-back 64 // readings over a short period. In a perfect world, that "certain amount" 65 // would be one wavelength of the current voice pitch...BUT...with no pitch 66 // detecton currently, we instead use a fixed middle-of-the-road value: 67 // TYP_PITCH_HZ, 175 by default, which is a bit below typical female spoken 68 // vocal range and a bit above typical male spoken range. This all goes out 69 // the window with singing, and of course young people will have a higher 70 // speech range, is just a crude catch-all approximation. 71 static const uint16_t jump = (int)(sampleRate / (float)TYP_PITCH_HZ + 0.5); 72 static const uint16_t interp = jump / 4; // Interp time = 1/4 waveform 73 static bool jumping = false; 74 static uint16_t jumpCount = 1; 75 static int16_t jumpThreshold; 76 static int16_t playbackIndexJumped; 77 static uint16_t nextOut = 2048; 78 79 float voicePitch(float p); 80 81 // START PITCH SHIFT (no arguments) ---------------------------------------- 82 83 bool voiceSetup(bool modEnable) { 84 85 // Allocate circular buffer for audio 86 if(NULL == (recBuf = (uint16_t *)malloc(recBufSize * sizeof(uint16_t)))) { 87 return false; // Fail 88 } 89 90 // Allocate buffer for voice modulation, if enabled 91 if(modEnable) { 92 // 250 comes from min period in voicePitch() 93 modBuf = (uint8_t *)malloc((int)(48000000.0 / 250.0 / MOD_MIN + 0.5)); 94 // If malloc fails, program will continue without modulation 95 } 96 97 pdmspi.begin(sampleRate); // Set up PDM microphone 98 analogWriteResolution(12); // Set up analog output 99 voicePitch(1.0); // Set timer interval 100 101 return true; // Success 102 } 103 104 // SET PITCH --------------------------------------------------------------- 105 106 // Set pitch adjustment, higher numbers = higher pitch. 0 < pitch < inf 107 // 0.5 = halve frequency (1 octave down) 108 // 1.0 = normal playback 109 // 2.0 = double frequency (1 octave up) 110 // Available pitch adjustment range depends on various hardware factors 111 // (SPI speed, timer/counter resolution, etc.), and the actual pitch 112 // adjustment (after appying constraints) will be returned. 113 float voicePitch(float p) { 114 float desiredPlaybackRate = sampleRate * p; 115 // Clip to sensible range 116 if(desiredPlaybackRate < 19200) desiredPlaybackRate = 19200; // ~0.41X 117 else if(desiredPlaybackRate > 192000) desiredPlaybackRate = 192000; // ~4.1X 118 arcada.timerCallback(desiredPlaybackRate, voiceOutCallback); 119 // Making this assumption here knowing Arcada will use 1:1 prescale: 120 int32_t period = (int32_t)(48000000.0 / desiredPlaybackRate); 121 actualPlaybackRate = 48000000.0 / (float)period; 122 p = (actualPlaybackRate / sampleRate); // New pitch 123 jumpThreshold = (int)(jump * p + 0.5); 124 return p; 125 } 126 127 // SET GAIN ---------------------------------------------------------------- 128 129 void voiceGain(float g) { 130 pdmspi.setMicGain(g); // Handles its own clipping 131 } 132 133 // SET MODULATION ---------------------------------------------------------- 134 135 // This needs to be called after any call to voicePitch() -- the modulation 136 // table is not currently auto-regenerated. Maybe that'll change. 137 138 void voiceMod(uint32_t freq, uint8_t waveform) { 139 if(modBuf) { // Ignore if no modulation buffer allocated 140 if(freq < MOD_MIN) freq = MOD_MIN; 141 modLen = (uint32_t)(actualPlaybackRate / freq + 0.5); 142 if(modLen < 2) modLen = 2; 143 if(waveform > 4) waveform = 4; 144 modWave = waveform; 145 yield(); 146 switch(waveform) { 147 case 0: // None 148 break; 149 case 1: // Square 150 memset(modBuf, 255, modLen / 2); 151 memset(&modBuf[modLen / 2], 0, modLen - modLen / 2); 152 break; 153 case 2: // Sine 154 for(uint32_t i=0; i<modLen; i++) { 155 modBuf[i] = (int)((sin(M_PI * 2.0 * (float)i / (float)modLen) + 1.0) * 0.5 * 255.0 + 0.5); 156 } 157 break; 158 case 3: // Triangle 159 for(uint32_t i=0; i<modLen; i++) { 160 modBuf[i] = (int)(fabs(0.5 - (float)i / (float)modLen) * 2.0 * 255.0 + 0.5); 161 } 162 break; 163 case 4: // Sawtooth (increasing) 164 for(uint32_t i=0; i<modLen; i++) { 165 modBuf[i] = (int)((float)i / (float)(modLen - 1) * 255.0 + 0.5); 166 } 167 break; 168 } 169 } 170 } 171 172 // INTERRUPT HANDLERS ------------------------------------------------------ 173 174 void PDM_SERCOM_HANDLER(void) { 175 uint16_t micReading = 0; 176 if(pdmspi.decimateFilterWord(&micReading, true)) { 177 // So, the theory is, in the future some basic pitch detection could be 178 // added right about here, which could be used to improve the seam 179 // transitions in the playback interrupt (and possibly other things, 180 // like dynamic adjustment of the playback rate to do monotone and other 181 // effects). Actual usable pitch detection on speech turns out to be One 182 // Of Those Nearly Insurmountable Problems In Audio Processing...if 183 // you're thinking "oh just count the zero crossings" "just use an FFT" 184 // it's really not that simple, trust me, please, I've been reading 185 // everything on this, speech waveforms are jerks. Had the beginnings of 186 // some "maybe good enough approximation for a hacky microcontroller 187 // project" code here, but it's pulled out for now for the sake of 188 // getting something not-broken in folks' hands in a sensible timeframe. 189 if(++recIndex >= recBufSize) recIndex = 0; 190 recBuf[recIndex] = micReading; 191 192 // Outside code can use the value of voiceLastReading if you want to 193 // do an approximate live waveform display, or dynamic gain adjustment 194 // based on mic input, or other stuff. This won't give you every single 195 // sample in the recording buffer one-by-one sequentially...it's just 196 // the last thing that was stored prior to whatever time you polled it, 197 // but may still have some uses. 198 voiceLastReading = micReading; 199 200 // Similarly, user code can extern these variables and monitor the 201 // peak-to-peak range. They are never reset in the voice code itself, 202 // it's the duty of the user code to reset both to 32768 periodically. 203 if(micReading < voiceMin) voiceMin = micReading; 204 else if(micReading > voiceMax) voiceMax = micReading; 205 } 206 } 207 208 static void voiceOutCallback(void) { 209 210 // Modulation is done on the output (rather than the input) because 211 // pitch-shifting modulated input would cause weird waveform 212 // discontinuities. This does require recalculating the modulation table 213 // any time the pitch changes though. 214 if(modWave) { 215 nextOut = (((int32_t)nextOut - 2048) * (modBuf[modIndex] + 1) / 256) + 2048; 216 if(++modIndex >= modLen) modIndex = 0; 217 } 218 219 // Do analog writes pronto so output timing is consistent 220 analogWrite(A0, nextOut); 221 analogWrite(A1, nextOut); 222 // Then we can take whatever variable time for processing the next cycle... 223 224 if(++playbackIndex >= recBufSize) playbackIndex = 0; 225 226 if(jumping) { 227 // A waveform-blending transition is in-progress 228 uint32_t w1 = 65536UL * jumpCount / jump, // ramp playbackIndexJumped up (14 bits) 229 w2 = 65536UL - w1; // ramp playbackIndex down (14 bits) 230 nextOut = (recBuf[playbackIndexJumped] * w1 + recBuf[playbackIndex] * w2) >> 20; // 28 bit result->12 bits 231 if(++jumpCount >= jump) { 232 playbackIndex = playbackIndexJumped; 233 jumpCount = 1; 234 jumping = false; 235 } else { 236 if(++playbackIndexJumped >= recBufSize) playbackIndexJumped = 0; 237 } 238 } else { 239 nextOut = recBuf[playbackIndex] >> 4; // 16->12 bit 240 if(playbackRate >= sampleRate) { // Sped up 241 // Playback may overtake recording, need to back off periodically 242 int16_t dist = (recIndex >= playbackIndex) ? 243 (recIndex - playbackIndex) : (recBufSize - (playbackIndex - recIndex)); 244 if(dist <= jumpThreshold) { 245 playbackIndexJumped = playbackIndex - jump; 246 if(playbackIndexJumped < 0) playbackIndexJumped += recBufSize; 247 jumping = true; 248 } 249 } else { // Slowed down 250 // Playback may underflow recording, need to advance periodically 251 int16_t dist = (playbackIndex >= recIndex) ? 252 (playbackIndex - recIndex) : (recBufSize - 1 - (recIndex - playbackIndex)); 253 if(dist <= jumpThreshold) { 254 playbackIndexJumped = (playbackIndex + jump) % recBufSize; 255 jumping = true; 256 } 257 } 258 } 259 } 260 261 #endif // ADAFRUIT_MONSTER_M4SK_EXPRESS