/ Drivers / CMSIS / DSP / Source / TransformFunctions / arm_cfft_f64.c
arm_cfft_f64.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_cfft_f64.c
  4   * Description:  Combined Radix Decimation in Frequency CFFT Double Precision Floating point processing function
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/transform_functions.h"
 30  #include "arm_common_tables.h"
 31  
 32  
 33  extern void arm_radix4_butterfly_f64(
 34          float64_t * pSrc,
 35          uint16_t fftLen,
 36    const float64_t * pCoef,
 37          uint16_t twidCoefModifier);
 38  
 39  extern void arm_bitreversal_64(
 40          uint64_t * pSrc,
 41    const uint16_t   bitRevLen,
 42    const uint16_t * pBitRevTable);
 43  
 44  /**
 45  * @} end of ComplexFFT group
 46  */
 47  
 48  /* ----------------------------------------------------------------------
 49   * Internal helper function used by the FFTs
 50   * ---------------------------------------------------------------------- */
 51  
 52  /*
 53  * @brief  Core function for the Double Precision floating-point CFFT butterfly process.
 54  * @param[in, out] *pSrc            points to the in-place buffer of F64 data type.
 55  * @param[in]      fftLen           length of the FFT.
 56  * @param[in]      *pCoef           points to the twiddle coefficient buffer.
 57  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
 58  * @return none.
 59  */
 60  
 61  void arm_radix4_butterfly_f64(
 62          float64_t * pSrc,
 63          uint16_t fftLen,
 64    const float64_t * pCoef,
 65          uint16_t twidCoefModifier)
 66  {
 67  
 68     float64_t co1, co2, co3, si1, si2, si3;
 69     uint32_t ia1, ia2, ia3;
 70     uint32_t i0, i1, i2, i3;
 71     uint32_t n1, n2, j, k;
 72  
 73     float64_t t1, t2, r1, r2, s1, s2;
 74  
 75  
 76     /*  Initializations for the fft calculation */
 77     n2 = fftLen;
 78     n1 = n2;
 79     for (k = fftLen; k > 1U; k >>= 2U)
 80     {
 81        /*  Initializations for the fft calculation */
 82        n1 = n2;
 83        n2 >>= 2U;
 84        ia1 = 0U;
 85  
 86        /*  FFT Calculation */
 87        j = 0;
 88        do
 89        {
 90           /*  index calculation for the coefficients */
 91           ia2 = ia1 + ia1;
 92           ia3 = ia2 + ia1;
 93           co1 = pCoef[ia1 * 2U];
 94           si1 = pCoef[(ia1 * 2U) + 1U];
 95           co2 = pCoef[ia2 * 2U];
 96           si2 = pCoef[(ia2 * 2U) + 1U];
 97           co3 = pCoef[ia3 * 2U];
 98           si3 = pCoef[(ia3 * 2U) + 1U];
 99  
100           /*  Twiddle coefficients index modifier */
101           ia1 = ia1 + twidCoefModifier;
102  
103           i0 = j;
104           do
105           {
106              /*  index calculation for the input as, */
107              /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
108              i1 = i0 + n2;
109              i2 = i1 + n2;
110              i3 = i2 + n2;
111  
112              /* xa + xc */
113              r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
114  
115              /* xa - xc */
116              r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
117  
118              /* ya + yc */
119              s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
120  
121              /* ya - yc */
122              s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
123  
124              /* xb + xd */
125              t1 = pSrc[2U * i1] + pSrc[2U * i3];
126  
127              /* xa' = xa + xb + xc + xd */
128              pSrc[2U * i0] = r1 + t1;
129  
130              /* xa + xc -(xb + xd) */
131              r1 = r1 - t1;
132  
133              /* yb + yd */
134              t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
135  
136              /* ya' = ya + yb + yc + yd */
137              pSrc[(2U * i0) + 1U] = s1 + t2;
138  
139              /* (ya + yc) - (yb + yd) */
140              s1 = s1 - t2;
141  
142              /* (yb - yd) */
143              t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
144  
145              /* (xb - xd) */
146              t2 = pSrc[2U * i1] - pSrc[2U * i3];
147  
148              /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
149              pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
150  
151              /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
152              pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
153  
154              /* (xa - xc) + (yb - yd) */
155              r1 = r2 + t1;
156  
157              /* (xa - xc) - (yb - yd) */
158              r2 = r2 - t1;
159  
160              /* (ya - yc) -  (xb - xd) */
161              s1 = s2 - t2;
162  
163              /* (ya - yc) +  (xb - xd) */
164              s2 = s2 + t2;
165  
166              /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
167              pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
168  
169              /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
170              pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
171  
172              /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
173              pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
174  
175              /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
176              pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
177  
178              i0 += n1;
179           } while ( i0 < fftLen);
180           j++;
181        } while (j <= (n2 - 1U));
182        twidCoefModifier <<= 2U;
183     }
184  }
185  
186  /*
187  * @brief  Core function for the Double Precision floating-point CFFT butterfly process.
188  * @param[in, out] *pSrc            points to the in-place buffer of F64 data type.
189  * @param[in]      fftLen           length of the FFT.
190  * @param[in]      *pCoef           points to the twiddle coefficient buffer.
191  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
192  * @return none.
193  */
194  
195  void arm_cfft_radix4by2_f64(
196      float64_t * pSrc,
197      uint32_t fftLen,
198      const float64_t * pCoef)
199  {
200      uint32_t i, l;
201      uint32_t n2, ia;
202      float64_t xt, yt, cosVal, sinVal;
203      float64_t p0, p1,p2,p3,a0,a1;
204  
205      n2 = fftLen >> 1;
206      ia = 0;
207      for (i = 0; i < n2; i++)
208      {
209          cosVal = pCoef[2*ia];
210          sinVal = pCoef[2*ia + 1];
211          ia++;
212  
213          l = i + n2;
214  
215          /*  Butterfly implementation */
216          a0 = pSrc[2 * i] + pSrc[2 * l];
217          xt = pSrc[2 * i] - pSrc[2 * l];
218  
219          yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
220          a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
221  
222          p0 = xt * cosVal;
223          p1 = yt * sinVal;
224          p2 = yt * cosVal;
225          p3 = xt * sinVal;
226  
227          pSrc[2 * i]     = a0;
228          pSrc[2 * i + 1] = a1;
229  
230          pSrc[2 * l]     = p0 + p1;
231          pSrc[2 * l + 1] = p2 - p3;
232  
233      }
234  
235      // first col
236      arm_radix4_butterfly_f64( pSrc, n2, (float64_t*)pCoef, 2U);
237      // second col
238      arm_radix4_butterfly_f64( pSrc + fftLen, n2, (float64_t*)pCoef, 2U);
239  
240  }
241  
242  /**
243    @addtogroup ComplexFFT
244    @{
245   */
246  
247  /**
248    @brief         Processing function for the Double Precision floating-point complex FFT.
249    @param[in]     S              points to an instance of the Double Precision floating-point CFFT structure
250    @param[in,out] p1             points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
251    @param[in]     ifftFlag       flag that selects transform direction
252                     - value = 0: forward transform
253                     - value = 1: inverse transform
254    @param[in]     bitReverseFlag flag that enables / disables bit reversal of output
255                     - value = 0: disables bit reversal of output
256                     - value = 1: enables bit reversal of output
257    @return        none
258   */
259  
260  void arm_cfft_f64(
261    const arm_cfft_instance_f64 * S,
262          float64_t * p1,
263          uint8_t ifftFlag,
264          uint8_t bitReverseFlag)
265  {
266      uint32_t  L = S->fftLen, l;
267      float64_t invL, * pSrc;
268  
269      if (ifftFlag == 1U)
270      {
271          /*  Conjugate input data  */
272          pSrc = p1 + 1;
273          for(l=0; l<L; l++)
274          {
275              *pSrc = -*pSrc;
276              pSrc += 2;
277          }
278      }
279  
280      switch (L)
281      {
282          case 16:
283          case 64:
284          case 256:
285          case 1024:
286          case 4096:
287          arm_radix4_butterfly_f64  (p1, L, (float64_t*)S->pTwiddle, 1U);
288          break;
289  
290          case 32:
291          case 128:
292          case 512:
293          case 2048:
294          arm_cfft_radix4by2_f64  ( p1, L, (float64_t*)S->pTwiddle);
295          break;
296  
297      }
298  
299      if ( bitReverseFlag )
300          arm_bitreversal_64((uint64_t*)p1, S->bitRevLength,S->pBitRevTable);
301  
302      if (ifftFlag == 1U)
303      {
304          invL = 1.0 / (float64_t)L;
305          /*  Conjugate and scale output data */
306          pSrc = p1;
307          for(l=0; l<L; l++)
308          {
309              *pSrc++ *=   invL ;
310              *pSrc  = -(*pSrc) * invL;
311              pSrc++;
312          }
313      }
314  }
315  
316  /**
317    @} end of ComplexFFT group
318   */