arm_cfft_radix2_f16.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_cfft_radix2_f16.c 4 * Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/transform_functions_f16.h" 30 31 #if defined(ARM_FLOAT16_SUPPORTED) 32 33 void arm_radix2_butterfly_f16( 34 float16_t * pSrc, 35 uint32_t fftLen, 36 const float16_t * pCoef, 37 uint16_t twidCoefModifier); 38 39 void arm_radix2_butterfly_inverse_f16( 40 float16_t * pSrc, 41 uint32_t fftLen, 42 const float16_t * pCoef, 43 uint16_t twidCoefModifier, 44 float16_t onebyfftLen); 45 46 extern void arm_bitreversal_f16( 47 float16_t * pSrc, 48 uint16_t fftSize, 49 uint16_t bitRevFactor, 50 const uint16_t * pBitRevTab); 51 52 /** 53 @ingroup groupTransforms 54 */ 55 56 /** 57 @addtogroup ComplexFFT 58 @{ 59 */ 60 61 /** 62 @brief Radix-2 CFFT/CIFFT. 63 @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future 64 @param[in] S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure 65 @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place 66 @return none 67 */ 68 69 void arm_cfft_radix2_f16( 70 const arm_cfft_radix2_instance_f16 * S, 71 float16_t * pSrc) 72 { 73 74 if (S->ifftFlag == 1U) 75 { 76 /* Complex IFFT radix-2 */ 77 arm_radix2_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle, 78 S->twidCoefModifier, S->onebyfftLen); 79 } 80 else 81 { 82 /* Complex FFT radix-2 */ 83 arm_radix2_butterfly_f16(pSrc, S->fftLen, S->pTwiddle, 84 S->twidCoefModifier); 85 } 86 87 if (S->bitReverseFlag == 1U) 88 { 89 /* Bit Reversal */ 90 arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 91 } 92 93 } 94 95 96 /** 97 @} end of ComplexFFT group 98 */ 99 100 101 102 /* ---------------------------------------------------------------------- 103 ** Internal helper function used by the FFTs 104 ** ------------------------------------------------------------------- */ 105 106 /* 107 * @brief Core function for the floating-point CFFT butterfly process. 108 * @param[in, out] *pSrc points to the in-place buffer of floating-point data type. 109 * @param[in] fftLen length of the FFT. 110 * @param[in] *pCoef points to the twiddle coefficient buffer. 111 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 112 * @return none. 113 */ 114 115 void arm_radix2_butterfly_f16( 116 float16_t * pSrc, 117 uint32_t fftLen, 118 const float16_t * pCoef, 119 uint16_t twidCoefModifier) 120 { 121 122 uint32_t i, j, k, l; 123 uint32_t n1, n2, ia; 124 float16_t xt, yt, cosVal, sinVal; 125 float16_t p0, p1, p2, p3; 126 float16_t a0, a1; 127 128 #if defined (ARM_MATH_DSP) 129 130 /* Initializations for the first stage */ 131 n2 = fftLen >> 1; 132 ia = 0; 133 i = 0; 134 135 // loop for groups 136 for (k = n2; k > 0; k--) 137 { 138 cosVal = pCoef[ia * 2]; 139 sinVal = pCoef[(ia * 2) + 1]; 140 141 /* Twiddle coefficients index modifier */ 142 ia += twidCoefModifier; 143 144 /* index calculation for the input as, */ 145 /* pSrc[i + 0], pSrc[i + fftLen/1] */ 146 l = i + n2; 147 148 /* Butterfly implementation */ 149 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l]; 150 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l]; 151 152 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1]; 153 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1]; 154 155 p0 = (_Float16)xt * (_Float16)cosVal; 156 p1 = (_Float16)yt * (_Float16)sinVal; 157 p2 = (_Float16)yt * (_Float16)cosVal; 158 p3 = (_Float16)xt * (_Float16)sinVal; 159 160 pSrc[2 * i] = a0; 161 pSrc[2 * i + 1] = a1; 162 163 pSrc[2 * l] = (_Float16)p0 + (_Float16)p1; 164 pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3; 165 166 i++; 167 } // groups loop end 168 169 twidCoefModifier <<= 1U; 170 171 // loop for stage 172 for (k = n2; k > 2; k = k >> 1) 173 { 174 n1 = n2; 175 n2 = n2 >> 1; 176 ia = 0; 177 178 // loop for groups 179 j = 0; 180 do 181 { 182 cosVal = pCoef[ia * 2]; 183 sinVal = pCoef[(ia * 2) + 1]; 184 ia += twidCoefModifier; 185 186 // loop for butterfly 187 i = j; 188 do 189 { 190 l = i + n2; 191 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l]; 192 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l]; 193 194 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1]; 195 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1]; 196 197 p0 = (_Float16)xt * (_Float16)cosVal; 198 p1 = (_Float16)yt * (_Float16)sinVal; 199 p2 = (_Float16)yt * (_Float16)cosVal; 200 p3 = (_Float16)xt * (_Float16)sinVal; 201 202 pSrc[2 * i] = a0; 203 pSrc[2 * i + 1] = a1; 204 205 pSrc[2 * l] = (_Float16)p0 + (_Float16)p1; 206 pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3; 207 208 i += n1; 209 } while ( i < fftLen ); // butterfly loop end 210 j++; 211 } while ( j < n2); // groups loop end 212 twidCoefModifier <<= 1U; 213 } // stages loop end 214 215 // loop for butterfly 216 for (i = 0; i < fftLen; i += 2) 217 { 218 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2]; 219 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2]; 220 221 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3]; 222 a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1]; 223 224 pSrc[2 * i] = a0; 225 pSrc[2 * i + 1] = a1; 226 pSrc[2 * i + 2] = xt; 227 pSrc[2 * i + 3] = yt; 228 } // groups loop end 229 230 #else 231 232 n2 = fftLen; 233 234 // loop for stage 235 for (k = fftLen; k > 1; k = k >> 1) 236 { 237 n1 = n2; 238 n2 = n2 >> 1; 239 ia = 0; 240 241 // loop for groups 242 j = 0; 243 do 244 { 245 cosVal = pCoef[ia * 2]; 246 sinVal = pCoef[(ia * 2) + 1]; 247 ia += twidCoefModifier; 248 249 // loop for butterfly 250 i = j; 251 do 252 { 253 l = i + n2; 254 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l]; 255 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l]; 256 257 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1]; 258 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1]; 259 260 p0 = (_Float16)xt * (_Float16)cosVal; 261 p1 = (_Float16)yt * (_Float16)sinVal; 262 p2 = (_Float16)yt * (_Float16)cosVal; 263 p3 = (_Float16)xt * (_Float16)sinVal; 264 265 pSrc[2 * i] = a0; 266 pSrc[2 * i + 1] = a1; 267 268 pSrc[2 * l] = (_Float16)p0 + (_Float16)p1; 269 pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3; 270 271 i += n1; 272 } while (i < fftLen); 273 j++; 274 } while (j < n2); 275 twidCoefModifier <<= 1U; 276 } 277 278 #endif // #if defined (ARM_MATH_DSP) 279 280 } 281 282 283 void arm_radix2_butterfly_inverse_f16( 284 float16_t * pSrc, 285 uint32_t fftLen, 286 const float16_t * pCoef, 287 uint16_t twidCoefModifier, 288 float16_t onebyfftLen) 289 { 290 291 uint32_t i, j, k, l; 292 uint32_t n1, n2, ia; 293 float16_t xt, yt, cosVal, sinVal; 294 float16_t p0, p1, p2, p3; 295 float16_t a0, a1; 296 297 #if defined (ARM_MATH_DSP) 298 299 n2 = fftLen >> 1; 300 ia = 0; 301 302 // loop for groups 303 for (i = 0; i < n2; i++) 304 { 305 cosVal = pCoef[ia * 2]; 306 sinVal = pCoef[(ia * 2) + 1]; 307 ia += twidCoefModifier; 308 309 l = i + n2; 310 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l]; 311 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l]; 312 313 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1]; 314 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1]; 315 316 p0 = (_Float16)xt * (_Float16)cosVal; 317 p1 = (_Float16)yt * (_Float16)sinVal; 318 p2 = (_Float16)yt * (_Float16)cosVal; 319 p3 = (_Float16)xt * (_Float16)sinVal; 320 321 pSrc[2 * i] = a0; 322 pSrc[2 * i + 1] = a1; 323 324 pSrc[2 * l] = (_Float16)p0 - (_Float16)p1; 325 pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3; 326 } // groups loop end 327 328 twidCoefModifier <<= 1U; 329 330 // loop for stage 331 for (k = fftLen / 2; k > 2; k = k >> 1) 332 { 333 n1 = n2; 334 n2 = n2 >> 1; 335 ia = 0; 336 337 // loop for groups 338 j = 0; 339 do 340 { 341 cosVal = pCoef[ia * 2]; 342 sinVal = pCoef[(ia * 2) + 1]; 343 ia += twidCoefModifier; 344 345 // loop for butterfly 346 i = j; 347 do 348 { 349 l = i + n2; 350 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l]; 351 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l]; 352 353 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1]; 354 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1]; 355 356 p0 = (_Float16)xt * (_Float16)cosVal; 357 p1 = (_Float16)yt * (_Float16)sinVal; 358 p2 = (_Float16)yt * (_Float16)cosVal; 359 p3 = (_Float16)xt * (_Float16)sinVal; 360 361 pSrc[2 * i] = a0; 362 pSrc[2 * i + 1] = a1; 363 364 pSrc[2 * l] = (_Float16)p0 - (_Float16)p1; 365 pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3; 366 367 i += n1; 368 } while ( i < fftLen ); // butterfly loop end 369 j++; 370 } while (j < n2); // groups loop end 371 372 twidCoefModifier <<= 1U; 373 } // stages loop end 374 375 // loop for butterfly 376 for (i = 0; i < fftLen; i += 2) 377 { 378 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2]; 379 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2]; 380 381 a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1]; 382 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3]; 383 384 p0 = (_Float16)a0 * (_Float16)onebyfftLen; 385 p2 = (_Float16)xt * (_Float16)onebyfftLen; 386 p1 = (_Float16)a1 * (_Float16)onebyfftLen; 387 p3 = (_Float16)yt * (_Float16)onebyfftLen; 388 389 pSrc[2 * i] = p0; 390 pSrc[2 * i + 1] = p1; 391 pSrc[2 * i + 2] = p2; 392 pSrc[2 * i + 3] = p3; 393 } // butterfly loop end 394 395 #else 396 397 n2 = fftLen; 398 399 // loop for stage 400 for (k = fftLen; k > 2; k = k >> 1) 401 { 402 n1 = n2; 403 n2 = n2 >> 1; 404 ia = 0; 405 406 // loop for groups 407 j = 0; 408 do 409 { 410 cosVal = pCoef[ia * 2]; 411 sinVal = pCoef[(ia * 2) + 1]; 412 ia = ia + twidCoefModifier; 413 414 // loop for butterfly 415 i = j; 416 do 417 { 418 l = i + n2; 419 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l]; 420 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l]; 421 422 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1]; 423 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1]; 424 425 p0 = (_Float16)xt * (_Float16)cosVal; 426 p1 = (_Float16)yt * (_Float16)sinVal; 427 p2 = (_Float16)yt * (_Float16)cosVal; 428 p3 = (_Float16)xt * (_Float16)sinVal; 429 430 pSrc[2 * i] = a0; 431 pSrc[2 * i + 1] = a1; 432 433 pSrc[2 * l] = (_Float16)p0 - (_Float16)p1; 434 pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3; 435 436 i += n1; 437 } while ( i < fftLen ); // butterfly loop end 438 j++; 439 } while ( j < n2 ); // groups loop end 440 441 twidCoefModifier = twidCoefModifier << 1U; 442 } // stages loop end 443 444 n1 = n2; 445 n2 = n2 >> 1; 446 447 // loop for butterfly 448 for (i = 0; i < fftLen; i += n1) 449 { 450 l = i + n2; 451 452 a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l]; 453 xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l]; 454 455 a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1]; 456 yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1]; 457 458 p0 = (_Float16)a0 * (_Float16)onebyfftLen; 459 p2 = (_Float16)xt * (_Float16)onebyfftLen; 460 p1 = (_Float16)a1 * (_Float16)onebyfftLen; 461 p3 = (_Float16)yt * (_Float16)onebyfftLen; 462 463 pSrc[2 * i] = p0; 464 pSrc[2U * l] = p2; 465 466 pSrc[2 * i + 1] = p1; 467 pSrc[2U * l + 1U] = p3; 468 } // butterfly loop end 469 470 #endif // #if defined (ARM_MATH_DSP) 471 472 } 473 474 475 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */