/ Drivers / CMSIS / DSP / Source / TransformFunctions / arm_rfft_fast_f32.c
arm_rfft_fast_f32.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_rfft_fast_f32.c
  4   * Description:  RFFT & RIFFT Floating point process function
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/transform_functions.h"
 30  
 31  #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 32  void stage_rfft_f32(
 33    const arm_rfft_fast_instance_f32 * S,
 34          float32_t * p,
 35          float32_t * pOut)
 36  {
 37          int32_t  k;                                /* Loop Counter */
 38          float32_t twR, twI;                         /* RFFT Twiddle coefficients */
 39    const float32_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
 40          float32_t *pA = p;                          /* increasing pointer */
 41          float32_t *pB = p;                          /* decreasing pointer */
 42          float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
 43          float32_t t1a, t1b;                         /* temporary variables */
 44          float32_t p0, p1, p2, p3;                   /* temporary variables */
 45  
 46          float32x4x2_t tw,xA,xB;
 47          float32x4x2_t tmp1, tmp2, res;
 48  
 49          uint32x4_t     vecStridesFwd, vecStridesBkwd;
 50  
 51          vecStridesFwd = vidupq_u32((uint32_t)0, 2);
 52          vecStridesBkwd = -vecStridesFwd;
 53  
 54          int blockCnt;
 55  
 56  
 57     k = (S->Sint).fftLen - 1;
 58  
 59     /* Pack first and last sample of the frequency domain together */
 60  
 61     xBR = pB[0];
 62     xBI = pB[1];
 63     xAR = pA[0];
 64     xAI = pA[1];
 65  
 66     twR = *pCoeff++ ;
 67     twI = *pCoeff++ ;
 68  
 69     // U1 = XA(1) + XB(1); % It is real
 70     t1a = xBR + xAR  ;
 71  
 72     // U2 = XB(1) - XA(1); % It is imaginary
 73     t1b = xBI + xAI  ;
 74  
 75     // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
 76     // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
 77     *pOut++ = 0.5f * ( t1a + t1b );
 78     *pOut++ = 0.5f * ( t1a - t1b );
 79  
 80     // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
 81     pB  = p + 2*k;
 82     pA += 2;
 83  
 84     blockCnt = k >> 2;
 85     while (blockCnt > 0)
 86     {
 87        /*
 88           function X = my_split_rfft(X, ifftFlag)
 89           % X is a series of real numbers
 90           L  = length(X);
 91           XC = X(1:2:end) +i*X(2:2:end);
 92           XA = fft(XC);
 93           XB = conj(XA([1 end:-1:2]));
 94           TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
 95           for l = 2:L/2
 96              XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
 97           end
 98           XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
 99           X = XA;
100        */
101  
102  
103        xA = vld2q_f32(pA);
104        pA += 8;
105  
106        xB = vld2q_f32(pB);
107  
108        xB.val[0] = vldrwq_gather_shifted_offset_f32(pB, vecStridesBkwd);
109        xB.val[1] = vldrwq_gather_shifted_offset_f32(&pB[1], vecStridesBkwd);
110  
111        xB.val[1] = vnegq_f32(xB.val[1]);
112        pB -= 8;
113  
114  
115        tw = vld2q_f32(pCoeff);
116        pCoeff += 8;
117  
118  
119        tmp1.val[0] = vaddq_f32(xA.val[0],xB.val[0]);
120        tmp1.val[1] = vaddq_f32(xA.val[1],xB.val[1]);
121  
122        tmp2.val[0] = vsubq_f32(xB.val[0],xA.val[0]);
123        tmp2.val[1] = vsubq_f32(xB.val[1],xA.val[1]);
124  
125        res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
126        res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
127  
128        res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
129        res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
130  
131        res.val[0] = vaddq_f32(res.val[0],tmp1.val[0] );
132        res.val[1] = vaddq_f32(res.val[1],tmp1.val[1] );
133  
134        res.val[0] = vmulq_n_f32(res.val[0], 0.5f);
135        res.val[1] = vmulq_n_f32(res.val[1], 0.5f);
136  
137  
138        vst2q_f32(pOut, res);
139        pOut += 8;
140  
141      
142        blockCnt--;
143     } 
144  
145     blockCnt = k & 3;
146     while (blockCnt > 0)
147     {
148        /*
149           function X = my_split_rfft(X, ifftFlag)
150           % X is a series of real numbers
151           L  = length(X);
152           XC = X(1:2:end) +i*X(2:2:end);
153           XA = fft(XC);
154           XB = conj(XA([1 end:-1:2]));
155           TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
156           for l = 2:L/2
157              XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
158           end
159           XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
160           X = XA;
161        */
162  
163        xBI = pB[1];
164        xBR = pB[0];
165        xAR = pA[0];
166        xAI = pA[1];
167  
168        twR = *pCoeff++;
169        twI = *pCoeff++;
170  
171        t1a = xBR - xAR ;
172        t1b = xBI + xAI ;
173  
174        // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
175        // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
176        p0 = twR * t1a;
177        p1 = twI * t1a;
178        p2 = twR * t1b;
179        p3 = twI * t1b;
180  
181        *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
182        *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
183  
184        pA += 2;
185        pB -= 2;
186        blockCnt--;
187     }
188  }
189  
190  /* Prepares data for inverse cfft */
191  void merge_rfft_f32(
192    const arm_rfft_fast_instance_f32 * S,
193          float32_t * p,
194          float32_t * pOut)
195  {
196          int32_t  k;                                /* Loop Counter */
197          float32_t twR, twI;                         /* RFFT Twiddle coefficients */
198    const float32_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
199          float32_t *pA = p;                          /* increasing pointer */
200          float32_t *pB = p;                          /* decreasing pointer */
201          float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
202          float32_t t1a, t1b, r, s, t, u;             /* temporary variables */
203  
204          float32x4x2_t tw,xA,xB;
205          float32x4x2_t tmp1, tmp2, res;
206          uint32x4_t     vecStridesFwd, vecStridesBkwd;
207  
208          vecStridesFwd = vidupq_u32((uint32_t)0, 2);
209          vecStridesBkwd = -vecStridesFwd;
210  
211          int blockCnt;
212          
213  
214     k = (S->Sint).fftLen - 1;
215  
216     xAR = pA[0];
217     xAI = pA[1];
218  
219     pCoeff += 2 ;
220  
221     *pOut++ = 0.5f * ( xAR + xAI );
222     *pOut++ = 0.5f * ( xAR - xAI );
223  
224     pB  =  p + 2*k ;
225     pA +=  2    ;
226  
227     blockCnt = k >> 2;
228     while (blockCnt > 0)
229     {
230        /* G is half of the frequency complex spectrum */
231        //for k = 2:N
232        //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
233        xA = vld2q_f32(pA);
234        pA += 8;
235  
236        xB = vld2q_f32(pB);
237  
238        xB.val[0] = vldrwq_gather_shifted_offset_f32(pB, vecStridesBkwd);
239        xB.val[1] = vldrwq_gather_shifted_offset_f32(&pB[1], vecStridesBkwd);
240  
241        xB.val[1] = vnegq_f32(xB.val[1]);
242        pB -= 8;
243  
244  
245        tw = vld2q_f32(pCoeff);
246        tw.val[1] = vnegq_f32(tw.val[1]);
247        pCoeff += 8;
248  
249  
250        tmp1.val[0] = vaddq_f32(xA.val[0],xB.val[0]);
251        tmp1.val[1] = vaddq_f32(xA.val[1],xB.val[1]);
252  
253        tmp2.val[0] = vsubq_f32(xB.val[0],xA.val[0]);
254        tmp2.val[1] = vsubq_f32(xB.val[1],xA.val[1]);
255  
256        res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
257        res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
258  
259        res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
260        res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
261  
262        res.val[0] = vaddq_f32(res.val[0],tmp1.val[0] );
263        res.val[1] = vaddq_f32(res.val[1],tmp1.val[1] );
264  
265        res.val[0] = vmulq_n_f32(res.val[0], 0.5f);
266        res.val[1] = vmulq_n_f32(res.val[1], 0.5f);
267  
268  
269        vst2q_f32(pOut, res);
270        pOut += 8;
271  
272      
273        blockCnt--;
274     }
275  
276     blockCnt = k & 3;
277     while (blockCnt > 0)
278     {
279        /* G is half of the frequency complex spectrum */
280        //for k = 2:N
281        //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
282        xBI =   pB[1]    ;
283        xBR =   pB[0]    ;
284        xAR =  pA[0];
285        xAI =  pA[1];
286  
287        twR = *pCoeff++;
288        twI = *pCoeff++;
289  
290        t1a = xAR - xBR ;
291        t1b = xAI + xBI ;
292  
293        r = twR * t1a;
294        s = twI * t1b;
295        t = twI * t1a;
296        u = twR * t1b;
297  
298        // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
299        // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
300        *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
301        *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
302  
303        pA += 2;
304        pB -= 2;
305        blockCnt--;
306     }
307  
308  }
309  #else
310  void stage_rfft_f32(
311    const arm_rfft_fast_instance_f32 * S,
312          float32_t * p,
313          float32_t * pOut)
314  {
315          int32_t  k;                                /* Loop Counter */
316          float32_t twR, twI;                         /* RFFT Twiddle coefficients */
317    const float32_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
318          float32_t *pA = p;                          /* increasing pointer */
319          float32_t *pB = p;                          /* decreasing pointer */
320          float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
321          float32_t t1a, t1b;                         /* temporary variables */
322          float32_t p0, p1, p2, p3;                   /* temporary variables */
323  
324  
325     k = (S->Sint).fftLen - 1;
326  
327     /* Pack first and last sample of the frequency domain together */
328  
329     xBR = pB[0];
330     xBI = pB[1];
331     xAR = pA[0];
332     xAI = pA[1];
333  
334     twR = *pCoeff++ ;
335     twI = *pCoeff++ ;
336  
337  
338     // U1 = XA(1) + XB(1); % It is real
339     t1a = xBR + xAR  ;
340  
341     // U2 = XB(1) - XA(1); % It is imaginary
342     t1b = xBI + xAI  ;
343  
344     // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
345     // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
346     *pOut++ = 0.5f * ( t1a + t1b );
347     *pOut++ = 0.5f * ( t1a - t1b );
348  
349     // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
350     pB  = p + 2*k;
351     pA += 2;
352  
353     do
354     {
355        /*
356           function X = my_split_rfft(X, ifftFlag)
357           % X is a series of real numbers
358           L  = length(X);
359           XC = X(1:2:end) +i*X(2:2:end);
360           XA = fft(XC);
361           XB = conj(XA([1 end:-1:2]));
362           TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
363           for l = 2:L/2
364              XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
365           end
366           XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
367           X = XA;
368        */
369  
370        xBI = pB[1];
371        xBR = pB[0];
372        xAR = pA[0];
373        xAI = pA[1];
374  
375        twR = *pCoeff++;
376        twI = *pCoeff++;
377  
378        t1a = xBR - xAR ;
379        t1b = xBI + xAI ;
380  
381        // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
382        // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
383        p0 = twR * t1a;
384        p1 = twI * t1a;
385        p2 = twR * t1b;
386        p3 = twI * t1b;
387  
388        *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
389        *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
390  
391  
392        pA += 2;
393        pB -= 2;
394        k--;
395     } while (k > 0);
396  }
397  
398  /* Prepares data for inverse cfft */
399  void merge_rfft_f32(
400    const arm_rfft_fast_instance_f32 * S,
401          float32_t * p,
402          float32_t * pOut)
403  {
404          int32_t  k;                                /* Loop Counter */
405          float32_t twR, twI;                         /* RFFT Twiddle coefficients */
406    const float32_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
407          float32_t *pA = p;                          /* increasing pointer */
408          float32_t *pB = p;                          /* decreasing pointer */
409          float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
410          float32_t t1a, t1b, r, s, t, u;             /* temporary variables */
411  
412     k = (S->Sint).fftLen - 1;
413  
414     xAR = pA[0];
415     xAI = pA[1];
416  
417     pCoeff += 2 ;
418  
419     *pOut++ = 0.5f * ( xAR + xAI );
420     *pOut++ = 0.5f * ( xAR - xAI );
421  
422     pB  =  p + 2*k ;
423     pA +=  2	   ;
424  
425     while (k > 0)
426     {
427        /* G is half of the frequency complex spectrum */
428        //for k = 2:N
429        //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
430        xBI =   pB[1]    ;
431        xBR =   pB[0]    ;
432        xAR =  pA[0];
433        xAI =  pA[1];
434  
435        twR = *pCoeff++;
436        twI = *pCoeff++;
437  
438        t1a = xAR - xBR ;
439        t1b = xAI + xBI ;
440  
441        r = twR * t1a;
442        s = twI * t1b;
443        t = twI * t1a;
444        u = twR * t1b;
445  
446        // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
447        // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
448        *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
449        *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
450  
451        pA += 2;
452        pB -= 2;
453        k--;
454     }
455  
456  }
457  
458  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
459  
460  /**
461    @ingroup groupTransforms
462  */
463  
464  /**
465    @defgroup RealFFT Real FFT Functions
466   
467    @par
468                     The CMSIS DSP library includes specialized algorithms for computing the
469                     FFT of real data sequences.  The FFT is defined over complex data but
470                     in many applications the input is real.  Real FFT algorithms take advantage
471                     of the symmetry properties of the FFT and have a speed advantage over complex
472                     algorithms of the same length.
473    @par
474                     The Fast RFFT algorithm relays on the mixed radix CFFT that save processor usage.
475    @par
476                     The real length N forward FFT of a sequence is computed using the steps shown below.
477    @par
478                     \image html RFFT.gif "Real Fast Fourier Transform"
479    @par
480                     The real sequence is initially treated as if it were complex to perform a CFFT.
481                     Later, a processing stage reshapes the data to obtain half of the frequency spectrum
482                     in complex format. Except the first complex number that contains the two real numbers
483                     X[0] and X[N/2] all the data is complex. In other words, the first complex sample
484                     contains two real values packed.
485    @par
486                     The input for the inverse RFFT should keep the same format as the output of the
487                     forward RFFT. A first processing stage pre-process the data to later perform an
488                     inverse CFFT.
489    @par
490                     \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
491    @par
492                     The algorithms for floating-point, Q15, and Q31 data are slightly different
493                     and we describe each algorithm in turn.
494    @par           Floating-point
495                     The main functions are \ref arm_rfft_fast_f32() and \ref arm_rfft_fast_init_f32().
496                     The older functions \ref arm_rfft_f32() and \ref arm_rfft_init_f32() have been deprecated
497                     but are still documented.
498    @par
499                     The FFT of a real N-point sequence has even symmetry in the frequency domain. 
500                     The second half of the data equals the conjugate of the first half flipped in frequency. 
501                     Looking at the data, we see that we can uniquely represent the FFT using only N/2 complex numbers.
502                     These are packed into the output array in alternating real and imaginary components:
503    @par
504                     X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ...
505                     real[(N/2)-1], imag[(N/2)-1 }
506    @par
507                     It happens that the first complex number (real[0], imag[0]) is actually
508                     all real. real[0] represents the DC offset, and imag[0] should be 0.
509                     (real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is
510                     the first harmonic and so on.
511    @par
512                     The real FFT functions pack the frequency domain data in this fashion.
513                     The forward transform outputs the data in this form and the inverse
514                     transform expects input data in this form. The function always performs
515                     the needed bitreversal so that the input and output data is always in
516                     normal order. The functions support lengths of [32, 64, 128, ..., 4096]
517                     samples.
518    @par           Q15 and Q31
519                     The real algorithms are defined in a similar manner and utilize N/2 complex
520                     transforms behind the scenes.
521    @par
522                     The complex transforms used internally include scaling to prevent fixed-point
523                     overflows.  The overall scaling equals 1/(fftLen/2).
524                     Due to the use of complex transform internally, the source buffer is
525                     modified by the rfft.
526    @par
527                     A separate instance structure must be defined for each transform used but
528                     twiddle factor and bit reversal tables can be reused.
529    @par
530                     There is also an associated initialization function for each data type.
531                     The initialization function performs the following operations:
532                      - Sets the values of the internal structure fields.
533                      - Initializes twiddle factor table and bit reversal table pointers.
534                      - Initializes the internal complex FFT data structure.
535    @par
536                     Use of the initialization function is optional **except for MVE versions where it is mandatory**.
537                     If you don't use the initialization functions, then the structures should be initialized with code
538                     similar to the one below:
539    <pre>
540        arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
541        arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
542    </pre>
543                     where <code>fftLenReal</code> is the length of the real transform;
544                     <code>fftLenBy2</code> length of  the internal complex transform (fftLenReal/2).
545                     <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
546                     <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
547                     output (=1).
548                     <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
549                     The value is based on the FFT length;
550                     <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
551                     <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
552                     <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
553                     must also be initialized.  
554  @par
555                     Note that with MVE versions you can't initialize instance structures directly and **must
556                     use the initialization function**.
557   */
558  
559  /**
560    @addtogroup RealFFT
561    @{
562  */
563  
564  /**
565    @brief         Processing function for the floating-point real FFT.
566    @param[in]     S         points to an arm_rfft_fast_instance_f32 structure
567    @param[in]     p         points to input buffer (Source buffer is modified by this function.)
568    @param[in]     pOut      points to output buffer
569    @param[in]     ifftFlag
570                     - value = 0: RFFT
571                     - value = 1: RIFFT
572    @return        none
573  */
574  
575  void arm_rfft_fast_f32(
576    const arm_rfft_fast_instance_f32 * S,
577    float32_t * p,
578    float32_t * pOut,
579    uint8_t ifftFlag)
580  {
581     const arm_cfft_instance_f32 * Sint = &(S->Sint);
582  
583     /* Calculation of Real FFT */
584     if (ifftFlag)
585     {
586        /*  Real FFT compression */
587        merge_rfft_f32(S, p, pOut);
588        /* Complex radix-4 IFFT process */
589        arm_cfft_f32( Sint, pOut, ifftFlag, 1);
590     }
591     else
592     {
593        /* Calculation of RFFT of input */
594        arm_cfft_f32( Sint, p, ifftFlag, 1);
595  
596        /*  Real FFT extraction */
597        stage_rfft_f32(S, p, pOut);
598     }
599  }
600  
601  /**
602  * @} end of RealFFT group
603  */