/ Drivers / CMSIS / DSP / Source / ComplexMathFunctions / arm_cmplx_mag_squared_f16.c
arm_cmplx_mag_squared_f16.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_cmplx_mag_squared_f16.c
  4   * Description:  Floating-point complex magnitude squared
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/complex_math_functions_f16.h"
 30  
 31  #if defined(ARM_FLOAT16_SUPPORTED)
 32  
 33  /**
 34    @ingroup groupCmplxMath
 35   */
 36  
 37  /**
 38    @defgroup cmplx_mag_squared Complex Magnitude Squared
 39  
 40    Computes the magnitude squared of the elements of a complex data vector.
 41  
 42    The <code>pSrc</code> points to the source data and
 43    <code>pDst</code> points to the where the result should be written.
 44    <code>numSamples</code> specifies the number of complex samples
 45    in the input array and the data is stored in an interleaved fashion
 46    (real, imag, real, imag, ...).
 47    The input array has a total of <code>2*numSamples</code> values;
 48    the output array has a total of <code>numSamples</code> values.
 49  
 50    The underlying algorithm is used:
 51  
 52    <pre>
 53    for (n = 0; n < numSamples; n++) {
 54        pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
 55    }
 56    </pre>
 57  
 58    There are separate functions for floating-point, Q15, and Q31 data types.
 59   */
 60  
 61  /**
 62    @addtogroup cmplx_mag_squared
 63    @{
 64   */
 65  
 66  /**
 67    @brief         Floating-point complex magnitude squared.
 68    @param[in]     pSrc        points to input vector
 69    @param[out]    pDst        points to output vector
 70    @param[in]     numSamples  number of samples in each vector
 71    @return        none
 72   */
 73  
 74  #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 75  
 76  void arm_cmplx_mag_squared_f16(
 77    const float16_t * pSrc,
 78          float16_t * pDst,
 79          uint32_t numSamples)
 80  {
 81      int32_t blockSize = numSamples;  /* loop counters */
 82      f16x8x2_t vecSrc;
 83      f16x8_t sum;
 84  
 85      /* Compute 4 complex samples at a time */
 86      while (blockSize > 0)
 87      {
 88          mve_pred16_t p = vctp16q(blockSize);
 89          vecSrc = vld2q(pSrc);
 90          sum = vmulq_m(vuninitializedq_f16(),vecSrc.val[0], vecSrc.val[0],p);
 91          sum = vfmaq_m(sum, vecSrc.val[1], vecSrc.val[1],p);
 92          vstrhq_p_f16(pDst, sum,p);
 93  
 94          pSrc += 16;
 95          pDst += 8;
 96          
 97          /*
 98           * Decrement the blockSize loop counter
 99           */
100          blockSize-= 8;
101      }
102  
103  }
104  
105  #else
106  void arm_cmplx_mag_squared_f16(
107    const float16_t * pSrc,
108          float16_t * pDst,
109          uint32_t numSamples)
110  {
111          uint32_t blkCnt;                               /* Loop counter */
112          _Float16 real, imag;                          /* Temporary input variables */
113  
114  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
115  
116    /* Loop unrolling: Compute 4 outputs at a time */
117    blkCnt = numSamples >> 2U;
118  
119    while (blkCnt > 0U)
120    {
121      /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
122  
123      real = *pSrc++;
124      imag = *pSrc++;
125      *pDst++ = (real * real) + (imag * imag);
126  
127      real = *pSrc++;
128      imag = *pSrc++;
129      *pDst++ = (real * real) + (imag * imag);
130  
131      real = *pSrc++;
132      imag = *pSrc++;
133      *pDst++ = (real * real) + (imag * imag);
134  
135      real = *pSrc++;
136      imag = *pSrc++;
137      *pDst++ = (real * real) + (imag * imag);
138  
139      /* Decrement loop counter */
140      blkCnt--;
141    }
142  
143    /* Loop unrolling: Compute remaining outputs */
144    blkCnt = numSamples % 0x4U;
145  
146  #else
147  
148    /* Initialize blkCnt with number of samples */
149    blkCnt = numSamples;
150  
151  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
152  
153    while (blkCnt > 0U)
154    {
155      /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
156  
157      real = *pSrc++;
158      imag = *pSrc++;
159  
160      /* store result in destination buffer. */
161      *pDst++ = (real * real) + (imag * imag);
162  
163      /* Decrement loop counter */
164      blkCnt--;
165    }
166  
167  }
168  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
169  
170  /**
171    @} end of cmplx_mag_squared group
172   */
173  
174  #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */