Cradicle Explorer

/ Drivers / CMSIS / DSP / Source / ComplexMathFunctions / arm_cmplx_mag_fast_q15.c
arm_cmplx_mag_fast_q15.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_cmplx_mag_fast_q15.c
  4   * Description:  Q15 complex magnitude
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/complex_math_functions.h"
 30  
 31  /**
 32    @ingroup groupCmplxMath
 33   */
 34  
 35  /**
 36    @addtogroup cmplx_mag
 37    @{
 38   */
 39  
 40  /**
 41    @brief         Q15 complex magnitude.
 42    @param[in]     pSrc        points to input vector
 43    @param[out]    pDst        points to output vector
 44    @param[in]     numSamples  number of samples in each vector
 45    @return        none
 46  
 47    @par           Scaling and Overflow Behavior
 48                     The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
 49                     Fast functions are less accurate. This function will tend to clamp to 0
 50                     the too small values. So sqrt(x*x) = x will not always be true.
 51   */
 52  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 53  
 54  #include "arm_helium_utils.h"
 55  
 56  void arm_cmplx_mag_fast_q15(
 57    const q15_t * pSrc,
 58          q15_t * pDst,
 59          uint32_t numSamples)
 60  {
 61  
 62      int32_t blockSize = numSamples;  /* loop counters */
 63      uint32_t  blkCnt;           /* loop counters */
 64      q15x8x2_t vecSrc;
 65      q15x8_t sum;
 66      q31_t in;
 67      q31_t acc0;
 68  
 69      blkCnt = blockSize >> 3;
 70      while (blkCnt > 0U)
 71      {
 72          vecSrc = vld2q(pSrc);  
 73          pSrc += 16;
 74          sum = vqaddq(vmulhq(vecSrc.val[0], vecSrc.val[0]),
 75                       vmulhq(vecSrc.val[1], vecSrc.val[1]));
 76  
 77          sum = vshrq(sum, 1);
 78  
 79          sum = FAST_VSQRT_Q15(sum);
 80  
 81          vst1q(pDst, sum); 
 82          pDst += 8;
 83          /*
 84           * Decrement the blockSize loop counter
 85           */
 86          blkCnt--;
 87      }
 88  
 89      /*
 90       * tail
 91       */
 92      blkCnt = blockSize & 7;
 93  
 94      while (blkCnt > 0U)
 95      {
 96        /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
 97    
 98        in = read_q15x2_ia ((q15_t **) &pSrc);
 99        acc0 = __SMUAD(in, in);
100    
101        /* store result in 2.14 format in destination buffer. */
102        arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
103    
104    
105        /* Decrement loop counter */
106        blkCnt--;
107      }
108  }
109  
110  #else
111  void arm_cmplx_mag_fast_q15(
112    const q15_t * pSrc,
113          q15_t * pDst,
114          uint32_t numSamples)
115  {
116          uint32_t blkCnt;                               /* Loop counter */
117  
118  #if defined (ARM_MATH_DSP)
119          q31_t in;
120          q31_t acc0;                                    /* Accumulators */
121  #else
122         q15_t real, imag;                              /* Temporary input variables */
123         q31_t acc0, acc1;                              /* Accumulators */
124  #endif
125  
126  #if defined (ARM_MATH_LOOPUNROLL)
127  
128    /* Loop unrolling: Compute 4 outputs at a time */
129    blkCnt = numSamples >> 2U;
130  
131    while (blkCnt > 0U)
132    {
133      /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
134  
135  #if defined (ARM_MATH_DSP)
136      in = read_q15x2_ia (&pSrc);
137      acc0 = __SMUAD(in, in);
138      /* store result in 2.14 format in destination buffer. */
139      arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
140  
141      in = read_q15x2_ia (&pSrc);
142      acc0 = __SMUAD(in, in);
143      arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
144  
145      in = read_q15x2_ia (&pSrc);
146      acc0 = __SMUAD(in, in);
147      arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
148  
149      in = read_q15x2_ia (&pSrc);
150      acc0 = __SMUAD(in, in);
151      arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
152  #else
153      real = *pSrc++;
154      imag = *pSrc++;
155      acc0 = ((q31_t) real * real);
156      acc1 = ((q31_t) imag * imag);
157  
158      /* store result in 2.14 format in destination buffer. */
159      arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
160  
161      real = *pSrc++;
162      imag = *pSrc++;
163      acc0 = ((q31_t) real * real);
164      acc1 = ((q31_t) imag * imag);
165      arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
166  
167      real = *pSrc++;
168      imag = *pSrc++;
169      acc0 = ((q31_t) real * real);
170      acc1 = ((q31_t) imag * imag);
171      arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
172  
173      real = *pSrc++;
174      imag = *pSrc++;
175      acc0 = ((q31_t) real * real);
176      acc1 = ((q31_t) imag * imag);
177      arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
178  #endif /* #if defined (ARM_MATH_DSP) */
179  
180      /* Decrement loop counter */
181      blkCnt--;
182    }
183  
184    /* Loop unrolling: Compute remaining outputs */
185    blkCnt = numSamples % 0x4U;
186  
187  #else
188  
189    /* Initialize blkCnt with number of samples */
190    blkCnt = numSamples;
191  
192  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
193  
194    while (blkCnt > 0U)
195    {
196      /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
197  
198  #if defined (ARM_MATH_DSP)
199      in = read_q15x2_ia (&pSrc);
200      acc0 = __SMUAD(in, in);
201  
202      /* store result in 2.14 format in destination buffer. */
203      arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
204  #else
205      real = *pSrc++;
206      imag = *pSrc++;
207      acc0 = ((q31_t) real * real);
208      acc1 = ((q31_t) imag * imag);
209  
210      /* store result in 2.14 format in destination buffer. */
211      arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
212  #endif
213  
214      /* Decrement loop counter */
215      blkCnt--;
216    }
217  
218  }
219  #endif /* defined(ARM_MATH_MVEI) */
220  
221  /**
222    @} end of cmplx_mag group
223   */