/ Drivers / CMSIS / DSP / Source / TransformFunctions / arm_cfft_radix4_f32.c
arm_cfft_radix4_f32.c
   1  /* ----------------------------------------------------------------------
   2   * Project:      CMSIS DSP Library
   3   * Title:        arm_cfft_radix4_f32.c
   4   * Description:  Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
   5   *
   6   * $Date:        23 April 2021
   7   * $Revision:    V1.9.0
   8   *
   9   * Target Processor: Cortex-M and Cortex-A cores
  10   * -------------------------------------------------------------------- */
  11  /*
  12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13   *
  14   * SPDX-License-Identifier: Apache-2.0
  15   *
  16   * Licensed under the Apache License, Version 2.0 (the License); you may
  17   * not use this file except in compliance with the License.
  18   * You may obtain a copy of the License at
  19   *
  20   * www.apache.org/licenses/LICENSE-2.0
  21   *
  22   * Unless required by applicable law or agreed to in writing, software
  23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25   * See the License for the specific language governing permissions and
  26   * limitations under the License.
  27   */
  28  
  29  #include "dsp/transform_functions.h"
  30  
  31  extern void arm_bitreversal_f32(
  32          float32_t * pSrc,
  33          uint16_t fftSize,
  34          uint16_t bitRevFactor,
  35    const uint16_t * pBitRevTab);
  36  
  37  void arm_radix4_butterfly_f32(
  38          float32_t * pSrc,
  39          uint16_t fftLen,
  40    const float32_t * pCoef,
  41          uint16_t twidCoefModifier);
  42  
  43  void arm_radix4_butterfly_inverse_f32(
  44          float32_t * pSrc,
  45          uint16_t fftLen,
  46    const float32_t * pCoef,
  47          uint16_t twidCoefModifier,
  48          float32_t onebyfftLen);
  49  
  50  
  51  
  52  
  53  /**
  54    @ingroup groupTransforms
  55   */
  56  
  57  /**
  58    @addtogroup ComplexFFT
  59    @{
  60   */
  61  
  62  
  63  /**
  64    @brief         Processing function for the floating-point Radix-4 CFFT/CIFFT.
  65    @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
  66    @param[in]     S    points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
  67    @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
  68    @return        none
  69   */
  70  
  71  void arm_cfft_radix4_f32(
  72    const arm_cfft_radix4_instance_f32 * S,
  73          float32_t * pSrc)
  74  {
  75     if (S->ifftFlag == 1U)
  76     {
  77        /*  Complex IFFT radix-4  */
  78        arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
  79     }
  80     else
  81     {
  82        /*  Complex FFT radix-4  */
  83        arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  84     }
  85  
  86     if (S->bitReverseFlag == 1U)
  87     {
  88        /*  Bit Reversal */
  89        arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  90     }
  91  
  92  }
  93  
  94  /**
  95    @} end of ComplexFFT group
  96   */
  97  
  98  /* ----------------------------------------------------------------------
  99   * Internal helper function used by the FFTs
 100   * ---------------------------------------------------------------------- */
 101  
 102  /**
 103    brief         Core function for the floating-point CFFT butterfly process.
 104    param[in,out] pSrc             points to the in-place buffer of floating-point data type
 105    param[in]     fftLen           length of the FFT
 106    param[in]     pCoef            points to the twiddle coefficient buffer
 107    param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
 108    return        none
 109   */
 110  
 111  void arm_radix4_butterfly_f32(
 112          float32_t * pSrc,
 113          uint16_t fftLen,
 114    const float32_t * pCoef,
 115          uint16_t twidCoefModifier)
 116  {
 117          float32_t co1, co2, co3, si1, si2, si3;
 118          uint32_t ia1, ia2, ia3;
 119          uint32_t i0, i1, i2, i3;
 120          uint32_t n1, n2, j, k;
 121  
 122  #if defined (ARM_MATH_LOOPUNROLL)
 123  
 124          float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
 125          float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
 126          Ybminusd;
 127          float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
 128          float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
 129          float32_t *ptr1;
 130          float32_t p0,p1,p2,p3,p4,p5;
 131          float32_t a0,a1,a2,a3,a4,a5,a6,a7;
 132  
 133     /*  Initializations for the first stage */
 134     n2 = fftLen;
 135     n1 = n2;
 136  
 137     /* n2 = fftLen/4 */
 138     n2 >>= 2U;
 139     i0 = 0U;
 140     ia1 = 0U;
 141  
 142     j = n2;
 143  
 144     /*  Calculation of first stage */
 145     do
 146     {
 147        /*  index calculation for the input as, */
 148        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 149        i1 = i0 + n2;
 150        i2 = i1 + n2;
 151        i3 = i2 + n2;
 152  
 153        xaIn = pSrc[(2U * i0)];
 154        yaIn = pSrc[(2U * i0) + 1U];
 155  
 156        xbIn = pSrc[(2U * i1)];
 157        ybIn = pSrc[(2U * i1) + 1U];
 158  
 159        xcIn = pSrc[(2U * i2)];
 160        ycIn = pSrc[(2U * i2) + 1U];
 161  
 162        xdIn = pSrc[(2U * i3)];
 163        ydIn = pSrc[(2U * i3) + 1U];
 164  
 165        /* xa + xc */
 166        Xaplusc = xaIn + xcIn;
 167        /* xb + xd */
 168        Xbplusd = xbIn + xdIn;
 169        /* ya + yc */
 170        Yaplusc = yaIn + ycIn;
 171        /* yb + yd */
 172        Ybplusd = ybIn + ydIn;
 173  
 174        /*  index calculation for the coefficients */
 175        ia2 = ia1 + ia1;
 176        co2 = pCoef[ia2 * 2U];
 177        si2 = pCoef[(ia2 * 2U) + 1U];
 178  
 179        /* xa - xc */
 180        Xaminusc = xaIn - xcIn;
 181        /* xb - xd */
 182        Xbminusd = xbIn - xdIn;
 183        /* ya - yc */
 184        Yaminusc = yaIn - ycIn;
 185        /* yb - yd */
 186        Ybminusd = ybIn - ydIn;
 187  
 188        /* xa' = xa + xb + xc + xd */
 189        pSrc[(2U * i0)] = Xaplusc + Xbplusd;
 190        /* ya' = ya + yb + yc + yd */
 191        pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
 192  
 193        /* (xa - xc) + (yb - yd) */
 194        Xb12C_out = (Xaminusc + Ybminusd);
 195        /* (ya - yc) + (xb - xd) */
 196        Yb12C_out = (Yaminusc - Xbminusd);
 197        /* (xa + xc) - (xb + xd) */
 198        Xc12C_out = (Xaplusc - Xbplusd);
 199        /* (ya + yc) - (yb + yd) */
 200        Yc12C_out = (Yaplusc - Ybplusd);
 201        /* (xa - xc) - (yb - yd) */
 202        Xd12C_out = (Xaminusc - Ybminusd);
 203        /* (ya - yc) + (xb - xd) */
 204        Yd12C_out = (Xbminusd + Yaminusc);
 205  
 206        co1 = pCoef[ia1 * 2U];
 207        si1 = pCoef[(ia1 * 2U) + 1U];
 208  
 209        /*  index calculation for the coefficients */
 210        ia3 = ia2 + ia1;
 211        co3 = pCoef[ia3 * 2U];
 212        si3 = pCoef[(ia3 * 2U) + 1U];
 213  
 214        Xb12_out = Xb12C_out * co1;
 215        Yb12_out = Yb12C_out * co1;
 216        Xc12_out = Xc12C_out * co2;
 217        Yc12_out = Yc12C_out * co2;
 218        Xd12_out = Xd12C_out * co3;
 219        Yd12_out = Yd12C_out * co3;
 220  
 221        /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 222        //Xb12_out -= Yb12C_out * si1;
 223        p0 = Yb12C_out * si1;
 224        /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 225        //Yb12_out += Xb12C_out * si1;
 226        p1 = Xb12C_out * si1;
 227        /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 228        //Xc12_out -= Yc12C_out * si2;
 229        p2 = Yc12C_out * si2;
 230        /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 231        //Yc12_out += Xc12C_out * si2;
 232        p3 = Xc12C_out * si2;
 233        /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 234        //Xd12_out -= Yd12C_out * si3;
 235        p4 = Yd12C_out * si3;
 236        /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 237        //Yd12_out += Xd12C_out * si3;
 238        p5 = Xd12C_out * si3;
 239  
 240        Xb12_out += p0;
 241        Yb12_out -= p1;
 242        Xc12_out += p2;
 243        Yc12_out -= p3;
 244        Xd12_out += p4;
 245        Yd12_out -= p5;
 246  
 247        /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
 248        pSrc[2U * i1] = Xc12_out;
 249  
 250        /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
 251        pSrc[(2U * i1) + 1U] = Yc12_out;
 252  
 253        /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
 254        pSrc[2U * i2] = Xb12_out;
 255  
 256        /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
 257        pSrc[(2U * i2) + 1U] = Yb12_out;
 258  
 259        /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
 260        pSrc[2U * i3] = Xd12_out;
 261  
 262        /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
 263        pSrc[(2U * i3) + 1U] = Yd12_out;
 264  
 265        /*  Twiddle coefficients index modifier */
 266        ia1 += twidCoefModifier;
 267  
 268        /*  Updating input index */
 269        i0++;
 270  
 271     }
 272     while (--j);
 273  
 274     twidCoefModifier <<= 2U;
 275  
 276     /*  Calculation of second stage to excluding last stage */
 277     for (k = fftLen >> 2U; k > 4U; k >>= 2U)
 278     {
 279        /*  Initializations for the first stage */
 280        n1 = n2;
 281        n2 >>= 2U;
 282        ia1 = 0U;
 283  
 284        /*  Calculation of first stage */
 285        j = 0;
 286        do
 287        {
 288           /*  index calculation for the coefficients */
 289           ia2 = ia1 + ia1;
 290           ia3 = ia2 + ia1;
 291           co1 = pCoef[(ia1 * 2U)];
 292           si1 = pCoef[(ia1 * 2U) + 1U];
 293           co2 = pCoef[(ia2 * 2U)];
 294           si2 = pCoef[(ia2 * 2U) + 1U];
 295           co3 = pCoef[(ia3 * 2U)];
 296           si3 = pCoef[(ia3 * 2U) + 1U];
 297  
 298           /*  Twiddle coefficients index modifier */
 299           ia1 += twidCoefModifier;
 300  
 301           i0 = j;
 302           do
 303           {
 304              /*  index calculation for the input as, */
 305              /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 306              i1 = i0 + n2;
 307              i2 = i1 + n2;
 308              i3 = i2 + n2;
 309  
 310              xaIn = pSrc[(2U * i0)];
 311              yaIn = pSrc[(2U * i0) + 1U];
 312  
 313              xbIn = pSrc[(2U * i1)];
 314              ybIn = pSrc[(2U * i1) + 1U];
 315  
 316              xcIn = pSrc[(2U * i2)];
 317              ycIn = pSrc[(2U * i2) + 1U];
 318  
 319              xdIn = pSrc[(2U * i3)];
 320              ydIn = pSrc[(2U * i3) + 1U];
 321  
 322              /* xa - xc */
 323              Xaminusc = xaIn - xcIn;
 324              /* (xb - xd) */
 325              Xbminusd = xbIn - xdIn;
 326              /* ya - yc */
 327              Yaminusc = yaIn - ycIn;
 328              /* (yb - yd) */
 329              Ybminusd = ybIn - ydIn;
 330  
 331              /* xa + xc */
 332              Xaplusc = xaIn + xcIn;
 333              /* xb + xd */
 334              Xbplusd = xbIn + xdIn;
 335              /* ya + yc */
 336              Yaplusc = yaIn + ycIn;
 337              /* yb + yd */
 338              Ybplusd = ybIn + ydIn;
 339  
 340              /* (xa - xc) + (yb - yd) */
 341              Xb12C_out = (Xaminusc + Ybminusd);
 342              /* (ya - yc) -  (xb - xd) */
 343              Yb12C_out = (Yaminusc - Xbminusd);
 344              /* xa + xc -(xb + xd) */
 345              Xc12C_out = (Xaplusc - Xbplusd);
 346              /* (ya + yc) - (yb + yd) */
 347              Yc12C_out = (Yaplusc - Ybplusd);
 348              /* (xa - xc) - (yb - yd) */
 349              Xd12C_out = (Xaminusc - Ybminusd);
 350              /* (ya - yc) +  (xb - xd) */
 351              Yd12C_out = (Xbminusd + Yaminusc);
 352  
 353              pSrc[(2U * i0)] = Xaplusc + Xbplusd;
 354              pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
 355  
 356              Xb12_out = Xb12C_out * co1;
 357              Yb12_out = Yb12C_out * co1;
 358              Xc12_out = Xc12C_out * co2;
 359              Yc12_out = Yc12C_out * co2;
 360              Xd12_out = Xd12C_out * co3;
 361              Yd12_out = Yd12C_out * co3;
 362  
 363              /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 364              //Xb12_out -= Yb12C_out * si1;
 365              p0 = Yb12C_out * si1;
 366              /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 367              //Yb12_out += Xb12C_out * si1;
 368              p1 = Xb12C_out * si1;
 369              /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 370              //Xc12_out -= Yc12C_out * si2;
 371              p2 = Yc12C_out * si2;
 372              /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 373              //Yc12_out += Xc12C_out * si2;
 374              p3 = Xc12C_out * si2;
 375              /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 376              //Xd12_out -= Yd12C_out * si3;
 377              p4 = Yd12C_out * si3;
 378              /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 379              //Yd12_out += Xd12C_out * si3;
 380              p5 = Xd12C_out * si3;
 381  
 382              Xb12_out += p0;
 383              Yb12_out -= p1;
 384              Xc12_out += p2;
 385              Yc12_out -= p3;
 386              Xd12_out += p4;
 387              Yd12_out -= p5;
 388  
 389              /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
 390              pSrc[2U * i1] = Xc12_out;
 391  
 392              /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
 393              pSrc[(2U * i1) + 1U] = Yc12_out;
 394  
 395              /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
 396              pSrc[2U * i2] = Xb12_out;
 397  
 398              /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
 399              pSrc[(2U * i2) + 1U] = Yb12_out;
 400  
 401              /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
 402              pSrc[2U * i3] = Xd12_out;
 403  
 404              /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
 405              pSrc[(2U * i3) + 1U] = Yd12_out;
 406  
 407              i0 += n1;
 408           } while (i0 < fftLen);
 409           j++;
 410        } while (j <= (n2 - 1U));
 411        twidCoefModifier <<= 2U;
 412     }
 413  
 414     j = fftLen >> 2;
 415     ptr1 = &pSrc[0];
 416  
 417     /*  Calculations of last stage */
 418     do
 419     {
 420        xaIn = ptr1[0];
 421        yaIn = ptr1[1];
 422        xbIn = ptr1[2];
 423        ybIn = ptr1[3];
 424        xcIn = ptr1[4];
 425        ycIn = ptr1[5];
 426        xdIn = ptr1[6];
 427        ydIn = ptr1[7];
 428  
 429        /* xa + xc */
 430        Xaplusc = xaIn + xcIn;
 431  
 432        /* xa - xc */
 433        Xaminusc = xaIn - xcIn;
 434  
 435        /* ya + yc */
 436        Yaplusc = yaIn + ycIn;
 437  
 438        /* ya - yc */
 439        Yaminusc = yaIn - ycIn;
 440  
 441        /* xb + xd */
 442        Xbplusd = xbIn + xdIn;
 443  
 444        /* yb + yd */
 445        Ybplusd = ybIn + ydIn;
 446  
 447        /* (xb-xd) */
 448        Xbminusd = xbIn - xdIn;
 449  
 450        /* (yb-yd) */
 451        Ybminusd = ybIn - ydIn;
 452  
 453        /* xa' = xa + xb + xc + xd */
 454        a0 = (Xaplusc + Xbplusd);
 455        /* ya' = ya + yb + yc + yd */
 456        a1 = (Yaplusc + Ybplusd);
 457        /* xc' = (xa-xb+xc-xd) */
 458        a2 = (Xaplusc - Xbplusd);
 459        /* yc' = (ya-yb+yc-yd) */
 460        a3 = (Yaplusc - Ybplusd);
 461        /* xb' = (xa+yb-xc-yd) */
 462        a4 = (Xaminusc + Ybminusd);
 463        /* yb' = (ya-xb-yc+xd) */
 464        a5 = (Yaminusc - Xbminusd);
 465        /* xd' = (xa-yb-xc+yd)) */
 466        a6 = (Xaminusc - Ybminusd);
 467        /* yd' = (ya+xb-yc-xd) */
 468        a7 = (Xbminusd + Yaminusc);
 469  
 470        ptr1[0] = a0;
 471        ptr1[1] = a1;
 472        ptr1[2] = a2;
 473        ptr1[3] = a3;
 474        ptr1[4] = a4;
 475        ptr1[5] = a5;
 476        ptr1[6] = a6;
 477        ptr1[7] = a7;
 478  
 479        /* increment pointer by 8 */
 480        ptr1 += 8U;
 481     } while (--j);
 482  
 483  #else
 484  
 485          float32_t t1, t2, r1, r2, s1, s2;
 486  
 487     /* Initializations for the fft calculation */
 488     n2 = fftLen;
 489     n1 = n2;
 490     for (k = fftLen; k > 1U; k >>= 2U)
 491     {
 492        /*  Initializations for the fft calculation */
 493        n1 = n2;
 494        n2 >>= 2U;
 495        ia1 = 0U;
 496  
 497        /*  FFT Calculation */
 498        j = 0;
 499        do
 500        {
 501           /*  index calculation for the coefficients */
 502           ia2 = ia1 + ia1;
 503           ia3 = ia2 + ia1;
 504           co1 = pCoef[ia1 * 2U];
 505           si1 = pCoef[(ia1 * 2U) + 1U];
 506           co2 = pCoef[ia2 * 2U];
 507           si2 = pCoef[(ia2 * 2U) + 1U];
 508           co3 = pCoef[ia3 * 2U];
 509           si3 = pCoef[(ia3 * 2U) + 1U];
 510  
 511           /*  Twiddle coefficients index modifier */
 512           ia1 = ia1 + twidCoefModifier;
 513  
 514           i0 = j;
 515           do
 516           {
 517              /*  index calculation for the input as, */
 518              /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 519              i1 = i0 + n2;
 520              i2 = i1 + n2;
 521              i3 = i2 + n2;
 522  
 523              /* xa + xc */
 524              r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
 525  
 526              /* xa - xc */
 527              r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
 528  
 529              /* ya + yc */
 530              s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
 531  
 532              /* ya - yc */
 533              s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
 534  
 535              /* xb + xd */
 536              t1 = pSrc[2U * i1] + pSrc[2U * i3];
 537  
 538              /* xa' = xa + xb + xc + xd */
 539              pSrc[2U * i0] = r1 + t1;
 540  
 541              /* xa + xc -(xb + xd) */
 542              r1 = r1 - t1;
 543  
 544              /* yb + yd */
 545              t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
 546  
 547              /* ya' = ya + yb + yc + yd */
 548              pSrc[(2U * i0) + 1U] = s1 + t2;
 549  
 550              /* (ya + yc) - (yb + yd) */
 551              s1 = s1 - t2;
 552  
 553              /* (yb - yd) */
 554              t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
 555  
 556              /* (xb - xd) */
 557              t2 = pSrc[2U * i1] - pSrc[2U * i3];
 558  
 559              /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
 560              pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
 561  
 562              /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
 563              pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
 564  
 565              /* (xa - xc) + (yb - yd) */
 566              r1 = r2 + t1;
 567  
 568              /* (xa - xc) - (yb - yd) */
 569              r2 = r2 - t1;
 570  
 571              /* (ya - yc) -  (xb - xd) */
 572              s1 = s2 - t2;
 573  
 574              /* (ya - yc) +  (xb - xd) */
 575              s2 = s2 + t2;
 576  
 577              /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
 578              pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
 579  
 580              /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
 581              pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
 582  
 583              /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
 584              pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
 585  
 586              /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
 587              pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
 588  
 589              i0 += n1;
 590           } while ( i0 < fftLen);
 591           j++;
 592        } while (j <= (n2 - 1U));
 593        twidCoefModifier <<= 2U;
 594     }
 595  
 596  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
 597  
 598  }
 599  
 600  /**
 601    brief         Core function for the floating-point CIFFT butterfly process.
 602    param[in,out] pSrc             points to the in-place buffer of floating-point data type
 603    param[in]     fftLen           length of the FFT
 604    param[in]     pCoef            points to twiddle coefficient buffer
 605    param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
 606    param[in]     onebyfftLen      value of 1/fftLen
 607    return        none
 608   */
 609  
 610  void arm_radix4_butterfly_inverse_f32(
 611          float32_t * pSrc,
 612          uint16_t fftLen,
 613    const float32_t * pCoef,
 614          uint16_t twidCoefModifier,
 615          float32_t onebyfftLen)
 616  {
 617          float32_t co1, co2, co3, si1, si2, si3;
 618          uint32_t ia1, ia2, ia3;
 619          uint32_t i0, i1, i2, i3;
 620          uint32_t n1, n2, j, k;
 621  
 622  #if defined (ARM_MATH_LOOPUNROLL)
 623  
 624          float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
 625          float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
 626          Ybminusd;
 627          float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
 628          float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
 629          float32_t *ptr1;
 630          float32_t p0,p1,p2,p3,p4,p5,p6,p7;
 631          float32_t a0,a1,a2,a3,a4,a5,a6,a7;
 632  
 633  
 634     /*  Initializations for the first stage */
 635     n2 = fftLen;
 636     n1 = n2;
 637  
 638     /* n2 = fftLen/4 */
 639     n2 >>= 2U;
 640     i0 = 0U;
 641     ia1 = 0U;
 642  
 643     j = n2;
 644  
 645     /*  Calculation of first stage */
 646     do
 647     {
 648        /*  index calculation for the input as, */
 649        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 650        i1 = i0 + n2;
 651        i2 = i1 + n2;
 652        i3 = i2 + n2;
 653  
 654        /*  Butterfly implementation */
 655        xaIn = pSrc[(2U * i0)];
 656        yaIn = pSrc[(2U * i0) + 1U];
 657  
 658        xcIn = pSrc[(2U * i2)];
 659        ycIn = pSrc[(2U * i2) + 1U];
 660  
 661        xbIn = pSrc[(2U * i1)];
 662        ybIn = pSrc[(2U * i1) + 1U];
 663  
 664        xdIn = pSrc[(2U * i3)];
 665        ydIn = pSrc[(2U * i3) + 1U];
 666  
 667        /* xa + xc */
 668        Xaplusc = xaIn + xcIn;
 669        /* xb + xd */
 670        Xbplusd = xbIn + xdIn;
 671        /* ya + yc */
 672        Yaplusc = yaIn + ycIn;
 673        /* yb + yd */
 674        Ybplusd = ybIn + ydIn;
 675  
 676        /*  index calculation for the coefficients */
 677        ia2 = ia1 + ia1;
 678        co2 = pCoef[ia2 * 2U];
 679        si2 = pCoef[(ia2 * 2U) + 1U];
 680  
 681        /* xa - xc */
 682        Xaminusc = xaIn - xcIn;
 683        /* xb - xd */
 684        Xbminusd = xbIn - xdIn;
 685        /* ya - yc */
 686        Yaminusc = yaIn - ycIn;
 687        /* yb - yd */
 688        Ybminusd = ybIn - ydIn;
 689  
 690        /* xa' = xa + xb + xc + xd */
 691        pSrc[(2U * i0)] = Xaplusc + Xbplusd;
 692  
 693        /* ya' = ya + yb + yc + yd */
 694        pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
 695  
 696        /* (xa - xc) - (yb - yd) */
 697        Xb12C_out = (Xaminusc - Ybminusd);
 698        /* (ya - yc) + (xb - xd) */
 699        Yb12C_out = (Yaminusc + Xbminusd);
 700        /* (xa + xc) - (xb + xd) */
 701        Xc12C_out = (Xaplusc - Xbplusd);
 702        /* (ya + yc) - (yb + yd) */
 703        Yc12C_out = (Yaplusc - Ybplusd);
 704        /* (xa - xc) + (yb - yd) */
 705        Xd12C_out = (Xaminusc + Ybminusd);
 706        /* (ya - yc) - (xb - xd) */
 707        Yd12C_out = (Yaminusc - Xbminusd);
 708  
 709        co1 = pCoef[ia1 * 2U];
 710        si1 = pCoef[(ia1 * 2U) + 1U];
 711  
 712        /*  index calculation for the coefficients */
 713        ia3 = ia2 + ia1;
 714        co3 = pCoef[ia3 * 2U];
 715        si3 = pCoef[(ia3 * 2U) + 1U];
 716  
 717        Xb12_out = Xb12C_out * co1;
 718        Yb12_out = Yb12C_out * co1;
 719        Xc12_out = Xc12C_out * co2;
 720        Yc12_out = Yc12C_out * co2;
 721        Xd12_out = Xd12C_out * co3;
 722        Yd12_out = Yd12C_out * co3;
 723  
 724        /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 725        //Xb12_out -= Yb12C_out * si1;
 726        p0 = Yb12C_out * si1;
 727        /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 728        //Yb12_out += Xb12C_out * si1;
 729        p1 = Xb12C_out * si1;
 730        /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 731        //Xc12_out -= Yc12C_out * si2;
 732        p2 = Yc12C_out * si2;
 733        /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 734        //Yc12_out += Xc12C_out * si2;
 735        p3 = Xc12C_out * si2;
 736        /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 737        //Xd12_out -= Yd12C_out * si3;
 738        p4 = Yd12C_out * si3;
 739        /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 740        //Yd12_out += Xd12C_out * si3;
 741        p5 = Xd12C_out * si3;
 742  
 743        Xb12_out -= p0;
 744        Yb12_out += p1;
 745        Xc12_out -= p2;
 746        Yc12_out += p3;
 747        Xd12_out -= p4;
 748        Yd12_out += p5;
 749  
 750        /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 751        pSrc[2U * i1] = Xc12_out;
 752  
 753        /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 754        pSrc[(2U * i1) + 1U] = Yc12_out;
 755  
 756        /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 757        pSrc[2U * i2] = Xb12_out;
 758  
 759        /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 760        pSrc[(2U * i2) + 1U] = Yb12_out;
 761  
 762        /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 763        pSrc[2U * i3] = Xd12_out;
 764  
 765        /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 766        pSrc[(2U * i3) + 1U] = Yd12_out;
 767  
 768        /*  Twiddle coefficients index modifier */
 769        ia1 = ia1 + twidCoefModifier;
 770  
 771        /*  Updating input index */
 772        i0 = i0 + 1U;
 773  
 774     } while (--j);
 775  
 776     twidCoefModifier <<= 2U;
 777  
 778     /*  Calculation of second stage to excluding last stage */
 779     for (k = fftLen >> 2U; k > 4U; k >>= 2U)
 780     {
 781        /*  Initializations for the first stage */
 782        n1 = n2;
 783        n2 >>= 2U;
 784        ia1 = 0U;
 785  
 786        /*  Calculation of first stage */
 787        j = 0;
 788        do
 789        {
 790           /*  index calculation for the coefficients */
 791           ia2 = ia1 + ia1;
 792           ia3 = ia2 + ia1;
 793           co1 = pCoef[ia1 * 2U];
 794           si1 = pCoef[(ia1 * 2U) + 1U];
 795           co2 = pCoef[ia2 * 2U];
 796           si2 = pCoef[(ia2 * 2U) + 1U];
 797           co3 = pCoef[ia3 * 2U];
 798           si3 = pCoef[(ia3 * 2U) + 1U];
 799  
 800           /*  Twiddle coefficients index modifier */
 801           ia1 = ia1 + twidCoefModifier;
 802  
 803           i0 = j;
 804           do
 805           {
 806              /*  index calculation for the input as, */
 807              /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
 808              i1 = i0 + n2;
 809              i2 = i1 + n2;
 810              i3 = i2 + n2;
 811  
 812              xaIn = pSrc[(2U * i0)];
 813              yaIn = pSrc[(2U * i0) + 1U];
 814  
 815              xbIn = pSrc[(2U * i1)];
 816              ybIn = pSrc[(2U * i1) + 1U];
 817  
 818              xcIn = pSrc[(2U * i2)];
 819              ycIn = pSrc[(2U * i2) + 1U];
 820  
 821              xdIn = pSrc[(2U * i3)];
 822              ydIn = pSrc[(2U * i3) + 1U];
 823  
 824              /* xa - xc */
 825              Xaminusc = xaIn - xcIn;
 826              /* (xb - xd) */
 827              Xbminusd = xbIn - xdIn;
 828              /* ya - yc */
 829              Yaminusc = yaIn - ycIn;
 830              /* (yb - yd) */
 831              Ybminusd = ybIn - ydIn;
 832  
 833              /* xa + xc */
 834              Xaplusc = xaIn + xcIn;
 835              /* xb + xd */
 836              Xbplusd = xbIn + xdIn;
 837              /* ya + yc */
 838              Yaplusc = yaIn + ycIn;
 839              /* yb + yd */
 840              Ybplusd = ybIn + ydIn;
 841  
 842              /* (xa - xc) - (yb - yd) */
 843              Xb12C_out = (Xaminusc - Ybminusd);
 844              /* (ya - yc) +  (xb - xd) */
 845              Yb12C_out = (Yaminusc + Xbminusd);
 846              /* xa + xc -(xb + xd) */
 847              Xc12C_out = (Xaplusc - Xbplusd);
 848              /* (ya + yc) - (yb + yd) */
 849              Yc12C_out = (Yaplusc - Ybplusd);
 850              /* (xa - xc) + (yb - yd) */
 851              Xd12C_out = (Xaminusc + Ybminusd);
 852              /* (ya - yc) -  (xb - xd) */
 853              Yd12C_out = (Yaminusc - Xbminusd);
 854  
 855              pSrc[(2U * i0)] = Xaplusc + Xbplusd;
 856              pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
 857  
 858              Xb12_out = Xb12C_out * co1;
 859              Yb12_out = Yb12C_out * co1;
 860              Xc12_out = Xc12C_out * co2;
 861              Yc12_out = Yc12C_out * co2;
 862              Xd12_out = Xd12C_out * co3;
 863              Yd12_out = Yd12C_out * co3;
 864  
 865              /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 866              //Xb12_out -= Yb12C_out * si1;
 867              p0 = Yb12C_out * si1;
 868              /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 869              //Yb12_out += Xb12C_out * si1;
 870              p1 = Xb12C_out * si1;
 871              /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 872              //Xc12_out -= Yc12C_out * si2;
 873              p2 = Yc12C_out * si2;
 874              /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 875              //Yc12_out += Xc12C_out * si2;
 876              p3 = Xc12C_out * si2;
 877              /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 878              //Xd12_out -= Yd12C_out * si3;
 879              p4 = Yd12C_out * si3;
 880              /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 881              //Yd12_out += Xd12C_out * si3;
 882              p5 = Xd12C_out * si3;
 883  
 884              Xb12_out -= p0;
 885              Yb12_out += p1;
 886              Xc12_out -= p2;
 887              Yc12_out += p3;
 888              Xd12_out -= p4;
 889              Yd12_out += p5;
 890  
 891              /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
 892              pSrc[2U * i1] = Xc12_out;
 893  
 894              /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
 895              pSrc[(2U * i1) + 1U] = Yc12_out;
 896  
 897              /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
 898              pSrc[2U * i2] = Xb12_out;
 899  
 900              /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
 901              pSrc[(2U * i2) + 1U] = Yb12_out;
 902  
 903              /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
 904              pSrc[2U * i3] = Xd12_out;
 905  
 906              /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
 907              pSrc[(2U * i3) + 1U] = Yd12_out;
 908  
 909              i0 += n1;
 910           } while (i0 < fftLen);
 911           j++;
 912        } while (j <= (n2 - 1U));
 913        twidCoefModifier <<= 2U;
 914     }
 915     /*  Initializations of last stage */
 916  
 917     j = fftLen >> 2;
 918     ptr1 = &pSrc[0];
 919  
 920     /*  Calculations of last stage */
 921     do
 922     {
 923        xaIn = ptr1[0];
 924        yaIn = ptr1[1];
 925        xbIn = ptr1[2];
 926        ybIn = ptr1[3];
 927        xcIn = ptr1[4];
 928        ycIn = ptr1[5];
 929        xdIn = ptr1[6];
 930        ydIn = ptr1[7];
 931  
 932        /*  Butterfly implementation */
 933        /* xa + xc */
 934        Xaplusc = xaIn + xcIn;
 935  
 936        /* xa - xc */
 937        Xaminusc = xaIn - xcIn;
 938  
 939        /* ya + yc */
 940        Yaplusc = yaIn + ycIn;
 941  
 942        /* ya - yc */
 943        Yaminusc = yaIn - ycIn;
 944  
 945        /* xb + xd */
 946        Xbplusd = xbIn + xdIn;
 947  
 948        /* yb + yd */
 949        Ybplusd = ybIn + ydIn;
 950  
 951        /* (xb-xd) */
 952        Xbminusd = xbIn - xdIn;
 953  
 954        /* (yb-yd) */
 955        Ybminusd = ybIn - ydIn;
 956  
 957        /* xa' = (xa+xb+xc+xd) * onebyfftLen */
 958        a0 = (Xaplusc + Xbplusd);
 959        /* ya' = (ya+yb+yc+yd) * onebyfftLen */
 960        a1 = (Yaplusc + Ybplusd);
 961        /* xc' = (xa-xb+xc-xd) * onebyfftLen */
 962        a2 = (Xaplusc - Xbplusd);
 963        /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
 964        a3 = (Yaplusc - Ybplusd);
 965        /* xb' = (xa-yb-xc+yd) * onebyfftLen */
 966        a4 = (Xaminusc - Ybminusd);
 967        /* yb' = (ya+xb-yc-xd) * onebyfftLen */
 968        a5 = (Yaminusc + Xbminusd);
 969        /* xd' = (xa-yb-xc+yd) * onebyfftLen */
 970        a6 = (Xaminusc + Ybminusd);
 971        /* yd' = (ya-xb-yc+xd) * onebyfftLen */
 972        a7 = (Yaminusc - Xbminusd);
 973  
 974        p0 = a0 * onebyfftLen;
 975        p1 = a1 * onebyfftLen;
 976        p2 = a2 * onebyfftLen;
 977        p3 = a3 * onebyfftLen;
 978        p4 = a4 * onebyfftLen;
 979        p5 = a5 * onebyfftLen;
 980        p6 = a6 * onebyfftLen;
 981        p7 = a7 * onebyfftLen;
 982  
 983        /* xa' = (xa+xb+xc+xd) * onebyfftLen */
 984        ptr1[0] = p0;
 985        /* ya' = (ya+yb+yc+yd) * onebyfftLen */
 986        ptr1[1] = p1;
 987        /* xc' = (xa-xb+xc-xd) * onebyfftLen */
 988        ptr1[2] = p2;
 989        /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
 990        ptr1[3] = p3;
 991        /* xb' = (xa-yb-xc+yd) * onebyfftLen */
 992        ptr1[4] = p4;
 993        /* yb' = (ya+xb-yc-xd) * onebyfftLen */
 994        ptr1[5] = p5;
 995        /* xd' = (xa-yb-xc+yd) * onebyfftLen */
 996        ptr1[6] = p6;
 997        /* yd' = (ya-xb-yc+xd) * onebyfftLen */
 998        ptr1[7] = p7;
 999  
1000        /* increment source pointer by 8 for next calculations */
1001        ptr1 = ptr1 + 8U;
1002  
1003     } while (--j);
1004  
1005  #else
1006  
1007          float32_t t1, t2, r1, r2, s1, s2;
1008  
1009     /*  Initializations for the first stage */
1010     n2 = fftLen;
1011     n1 = n2;
1012  
1013     /*  Calculation of first stage */
1014     for (k = fftLen; k > 4U; k >>= 2U)
1015     {
1016        /*  Initializations for the first stage */
1017        n1 = n2;
1018        n2 >>= 2U;
1019        ia1 = 0U;
1020  
1021        /*  Calculation of first stage */
1022        j = 0;
1023        do
1024        {
1025           /*  index calculation for the coefficients */
1026           ia2 = ia1 + ia1;
1027           ia3 = ia2 + ia1;
1028           co1 = pCoef[ia1 * 2U];
1029           si1 = pCoef[(ia1 * 2U) + 1U];
1030           co2 = pCoef[ia2 * 2U];
1031           si2 = pCoef[(ia2 * 2U) + 1U];
1032           co3 = pCoef[ia3 * 2U];
1033           si3 = pCoef[(ia3 * 2U) + 1U];
1034  
1035           /*  Twiddle coefficients index modifier */
1036           ia1 = ia1 + twidCoefModifier;
1037  
1038           i0 = j;
1039           do
1040           {
1041              /*  index calculation for the input as, */
1042              /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1043              i1 = i0 + n2;
1044              i2 = i1 + n2;
1045              i3 = i2 + n2;
1046  
1047              /* xa + xc */
1048              r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
1049  
1050              /* xa - xc */
1051              r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
1052  
1053              /* ya + yc */
1054              s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1055  
1056              /* ya - yc */
1057              s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1058  
1059              /* xb + xd */
1060              t1 = pSrc[2U * i1] + pSrc[2U * i3];
1061  
1062              /* xa' = xa + xb + xc + xd */
1063              pSrc[2U * i0] = r1 + t1;
1064  
1065              /* xa + xc -(xb + xd) */
1066              r1 = r1 - t1;
1067  
1068              /* yb + yd */
1069              t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1070  
1071              /* ya' = ya + yb + yc + yd */
1072              pSrc[(2U * i0) + 1U] = s1 + t2;
1073  
1074              /* (ya + yc) - (yb + yd) */
1075              s1 = s1 - t2;
1076  
1077              /* (yb - yd) */
1078              t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1079  
1080              /* (xb - xd) */
1081              t2 = pSrc[2U * i1] - pSrc[2U * i3];
1082  
1083              /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1084              pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
1085  
1086              /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1087              pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
1088  
1089              /* (xa - xc) - (yb - yd) */
1090              r1 = r2 - t1;
1091  
1092              /* (xa - xc) + (yb - yd) */
1093              r2 = r2 + t1;
1094  
1095              /* (ya - yc) +  (xb - xd) */
1096              s1 = s2 + t2;
1097  
1098              /* (ya - yc) -  (xb - xd) */
1099              s2 = s2 - t2;
1100  
1101              /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1102              pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
1103  
1104              /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1105              pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
1106  
1107              /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1108              pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
1109  
1110              /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1111              pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
1112  
1113              i0 += n1;
1114           } while ( i0 < fftLen);
1115           j++;
1116        } while (j <= (n2 - 1U));
1117        twidCoefModifier <<= 2U;
1118     }
1119     /*  Initializations of last stage */
1120     n1 = n2;
1121     n2 >>= 2U;
1122  
1123     /*  Calculations of last stage */
1124     for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
1125     {
1126        /*  index calculation for the input as, */
1127        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
1128        i1 = i0 + n2;
1129        i2 = i1 + n2;
1130        i3 = i2 + n2;
1131  
1132        /*  Butterfly implementation */
1133        /* xa + xc */
1134        r1 = pSrc[2U * i0] + pSrc[2U * i2];
1135  
1136        /* xa - xc */
1137        r2 = pSrc[2U * i0] - pSrc[2U * i2];
1138  
1139        /* ya + yc */
1140        s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
1141  
1142        /* ya - yc */
1143        s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
1144  
1145        /* xc + xd */
1146        t1 = pSrc[2U * i1] + pSrc[2U * i3];
1147  
1148        /* xa' = xa + xb + xc + xd */
1149        pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
1150  
1151        /* (xa + xb) - (xc + xd) */
1152        r1 = r1 - t1;
1153  
1154        /* yb + yd */
1155        t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
1156  
1157        /* ya' = ya + yb + yc + yd */
1158        pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
1159  
1160        /* (ya + yc) - (yb + yd) */
1161        s1 = s1 - t2;
1162  
1163        /* (yb-yd) */
1164        t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
1165  
1166        /* (xb-xd) */
1167        t2 = pSrc[2U * i1] - pSrc[2U * i3];
1168  
1169        /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1170        pSrc[2U * i1] = r1 * onebyfftLen;
1171  
1172        /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1173        pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
1174  
1175        /* (xa - xc) - (yb-yd) */
1176        r1 = r2 - t1;
1177  
1178        /* (xa - xc) + (yb-yd) */
1179        r2 = r2 + t1;
1180  
1181        /* (ya - yc) + (xb-xd) */
1182        s1 = s2 + t2;
1183  
1184        /* (ya - yc) - (xb-xd) */
1185        s2 = s2 - t2;
1186  
1187        /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1188        pSrc[2U * i2] = r1 * onebyfftLen;
1189  
1190        /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1191        pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
1192  
1193        /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1194        pSrc[2U * i3] = r2 * onebyfftLen;
1195  
1196        /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1197        pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
1198     }
1199  
1200  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
1201  }
1202  
1203