/ Drivers / CMSIS / DSP / Source / ComplexMathFunctions / arm_cmplx_mag_squared_f32.c
arm_cmplx_mag_squared_f32.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_cmplx_mag_squared_f32.c
  4   * Description:  Floating-point complex magnitude squared
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/complex_math_functions.h"
 30  
 31  /**
 32    @ingroup groupCmplxMath
 33   */
 34  
 35  /**
 36    @defgroup cmplx_mag_squared Complex Magnitude Squared
 37  
 38    Computes the magnitude squared of the elements of a complex data vector.
 39  
 40    The <code>pSrc</code> points to the source data and
 41    <code>pDst</code> points to the where the result should be written.
 42    <code>numSamples</code> specifies the number of complex samples
 43    in the input array and the data is stored in an interleaved fashion
 44    (real, imag, real, imag, ...).
 45    The input array has a total of <code>2*numSamples</code> values;
 46    the output array has a total of <code>numSamples</code> values.
 47  
 48    The underlying algorithm is used:
 49  
 50    <pre>
 51    for (n = 0; n < numSamples; n++) {
 52        pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
 53    }
 54    </pre>
 55  
 56    There are separate functions for floating-point, Q15, and Q31 data types.
 57   */
 58  
 59  /**
 60    @addtogroup cmplx_mag_squared
 61    @{
 62   */
 63  
 64  /**
 65    @brief         Floating-point complex magnitude squared.
 66    @param[in]     pSrc        points to input vector
 67    @param[out]    pDst        points to output vector
 68    @param[in]     numSamples  number of samples in each vector
 69    @return        none
 70   */
 71  
 72  #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 73  
 74  void arm_cmplx_mag_squared_f32(
 75    const float32_t * pSrc,
 76          float32_t * pDst,
 77          uint32_t numSamples)
 78  {
 79      int32_t blockSize = numSamples;  /* loop counters */
 80      uint32_t  blkCnt;           /* loop counters */
 81      f32x4x2_t vecSrc;
 82      f32x4_t sum;
 83      float32_t real, imag;                          /* Temporary input variables */
 84  
 85      /* Compute 4 complex samples at a time */
 86      blkCnt = blockSize >> 2;
 87      while (blkCnt > 0U)
 88      {
 89          vecSrc = vld2q(pSrc);
 90          sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
 91          sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
 92          vst1q(pDst, sum);
 93  
 94          pSrc += 8;
 95          pDst += 4;
 96          
 97          /*
 98           * Decrement the blockSize loop counter
 99           */
100          blkCnt--;
101      }
102  
103      /* Tail */
104      blkCnt = blockSize & 3;
105      while (blkCnt > 0U)
106      {
107        /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
108    
109        real = *pSrc++;
110        imag = *pSrc++;
111    
112        /* store result in destination buffer. */
113        *pDst++ = (real * real) + (imag * imag);
114    
115        /* Decrement loop counter */
116        blkCnt--;
117      }
118  
119  }
120  
121  #else
122  void arm_cmplx_mag_squared_f32(
123    const float32_t * pSrc,
124          float32_t * pDst,
125          uint32_t numSamples)
126  {
127          uint32_t blkCnt;                               /* Loop counter */
128          float32_t real, imag;                          /* Temporary input variables */
129  
130  #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
131    float32x4x2_t vecA;
132    float32x4_t vRealA;
133    float32x4_t vImagA;
134    float32x4_t vMagSqA;
135  
136    float32x4x2_t vecB;
137    float32x4_t vRealB;
138    float32x4_t vImagB;
139    float32x4_t vMagSqB;
140  
141    /* Loop unrolling: Compute 8 outputs at a time */
142    blkCnt = numSamples >> 3;
143  
144    while (blkCnt > 0U)
145    {
146      /* out = sqrt((real * real) + (imag * imag)) */
147  
148      vecA = vld2q_f32(pSrc);
149      pSrc += 8;
150  
151      vRealA = vmulq_f32(vecA.val[0], vecA.val[0]);
152      vImagA = vmulq_f32(vecA.val[1], vecA.val[1]);
153      vMagSqA = vaddq_f32(vRealA, vImagA);
154  
155      vecB = vld2q_f32(pSrc);
156      pSrc += 8;
157  
158      vRealB = vmulq_f32(vecB.val[0], vecB.val[0]);
159      vImagB = vmulq_f32(vecB.val[1], vecB.val[1]);
160      vMagSqB = vaddq_f32(vRealB, vImagB);
161  
162      /* Store the result in the destination buffer. */
163      vst1q_f32(pDst, vMagSqA);
164      pDst += 4;
165  
166      vst1q_f32(pDst, vMagSqB);
167      pDst += 4;
168  
169      /* Decrement the loop counter */
170      blkCnt--;
171    }
172  
173    blkCnt = numSamples & 7;
174  
175  #else
176  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
177  
178    /* Loop unrolling: Compute 4 outputs at a time */
179    blkCnt = numSamples >> 2U;
180  
181    while (blkCnt > 0U)
182    {
183      /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
184  
185      real = *pSrc++;
186      imag = *pSrc++;
187      *pDst++ = (real * real) + (imag * imag);
188  
189      real = *pSrc++;
190      imag = *pSrc++;
191      *pDst++ = (real * real) + (imag * imag);
192  
193      real = *pSrc++;
194      imag = *pSrc++;
195      *pDst++ = (real * real) + (imag * imag);
196  
197      real = *pSrc++;
198      imag = *pSrc++;
199      *pDst++ = (real * real) + (imag * imag);
200  
201      /* Decrement loop counter */
202      blkCnt--;
203    }
204  
205    /* Loop unrolling: Compute remaining outputs */
206    blkCnt = numSamples % 0x4U;
207  
208  #else
209  
210    /* Initialize blkCnt with number of samples */
211    blkCnt = numSamples;
212  
213  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
214  #endif /* #if defined(ARM_MATH_NEON) */
215  
216    while (blkCnt > 0U)
217    {
218      /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
219  
220      real = *pSrc++;
221      imag = *pSrc++;
222  
223      /* store result in destination buffer. */
224      *pDst++ = (real * real) + (imag * imag);
225  
226      /* Decrement loop counter */
227      blkCnt--;
228    }
229  
230  }
231  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
232  
233  /**
234    @} end of cmplx_mag_squared group
235   */