arm_cmplx_mag_fast_q15.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_cmplx_mag_fast_q15.c 4 * Description: Q15 complex magnitude 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/complex_math_functions.h" 30 31 /** 32 @ingroup groupCmplxMath 33 */ 34 35 /** 36 @addtogroup cmplx_mag 37 @{ 38 */ 39 40 /** 41 @brief Q15 complex magnitude. 42 @param[in] pSrc points to input vector 43 @param[out] pDst points to output vector 44 @param[in] numSamples number of samples in each vector 45 @return none 46 47 @par Scaling and Overflow Behavior 48 The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format. 49 Fast functions are less accurate. This function will tend to clamp to 0 50 the too small values. So sqrt(x*x) = x will not always be true. 51 */ 52 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 53 54 #include "arm_helium_utils.h" 55 56 void arm_cmplx_mag_fast_q15( 57 const q15_t * pSrc, 58 q15_t * pDst, 59 uint32_t numSamples) 60 { 61 62 int32_t blockSize = numSamples; /* loop counters */ 63 uint32_t blkCnt; /* loop counters */ 64 q15x8x2_t vecSrc; 65 q15x8_t sum; 66 q31_t in; 67 q31_t acc0; 68 69 blkCnt = blockSize >> 3; 70 while (blkCnt > 0U) 71 { 72 vecSrc = vld2q(pSrc); 73 pSrc += 16; 74 sum = vqaddq(vmulhq(vecSrc.val[0], vecSrc.val[0]), 75 vmulhq(vecSrc.val[1], vecSrc.val[1])); 76 77 sum = vshrq(sum, 1); 78 79 sum = FAST_VSQRT_Q15(sum); 80 81 vst1q(pDst, sum); 82 pDst += 8; 83 /* 84 * Decrement the blockSize loop counter 85 */ 86 blkCnt--; 87 } 88 89 /* 90 * tail 91 */ 92 blkCnt = blockSize & 7; 93 94 while (blkCnt > 0U) 95 { 96 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ 97 98 in = read_q15x2_ia ((q15_t **) &pSrc); 99 acc0 = __SMUAD(in, in); 100 101 /* store result in 2.14 format in destination buffer. */ 102 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); 103 104 105 /* Decrement loop counter */ 106 blkCnt--; 107 } 108 } 109 110 #else 111 void arm_cmplx_mag_fast_q15( 112 const q15_t * pSrc, 113 q15_t * pDst, 114 uint32_t numSamples) 115 { 116 uint32_t blkCnt; /* Loop counter */ 117 118 #if defined (ARM_MATH_DSP) 119 q31_t in; 120 q31_t acc0; /* Accumulators */ 121 #else 122 q15_t real, imag; /* Temporary input variables */ 123 q31_t acc0, acc1; /* Accumulators */ 124 #endif 125 126 #if defined (ARM_MATH_LOOPUNROLL) 127 128 /* Loop unrolling: Compute 4 outputs at a time */ 129 blkCnt = numSamples >> 2U; 130 131 while (blkCnt > 0U) 132 { 133 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ 134 135 #if defined (ARM_MATH_DSP) 136 in = read_q15x2_ia (&pSrc); 137 acc0 = __SMUAD(in, in); 138 /* store result in 2.14 format in destination buffer. */ 139 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); 140 141 in = read_q15x2_ia (&pSrc); 142 acc0 = __SMUAD(in, in); 143 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); 144 145 in = read_q15x2_ia (&pSrc); 146 acc0 = __SMUAD(in, in); 147 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); 148 149 in = read_q15x2_ia (&pSrc); 150 acc0 = __SMUAD(in, in); 151 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); 152 #else 153 real = *pSrc++; 154 imag = *pSrc++; 155 acc0 = ((q31_t) real * real); 156 acc1 = ((q31_t) imag * imag); 157 158 /* store result in 2.14 format in destination buffer. */ 159 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); 160 161 real = *pSrc++; 162 imag = *pSrc++; 163 acc0 = ((q31_t) real * real); 164 acc1 = ((q31_t) imag * imag); 165 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); 166 167 real = *pSrc++; 168 imag = *pSrc++; 169 acc0 = ((q31_t) real * real); 170 acc1 = ((q31_t) imag * imag); 171 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); 172 173 real = *pSrc++; 174 imag = *pSrc++; 175 acc0 = ((q31_t) real * real); 176 acc1 = ((q31_t) imag * imag); 177 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); 178 #endif /* #if defined (ARM_MATH_DSP) */ 179 180 /* Decrement loop counter */ 181 blkCnt--; 182 } 183 184 /* Loop unrolling: Compute remaining outputs */ 185 blkCnt = numSamples % 0x4U; 186 187 #else 188 189 /* Initialize blkCnt with number of samples */ 190 blkCnt = numSamples; 191 192 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 193 194 while (blkCnt > 0U) 195 { 196 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ 197 198 #if defined (ARM_MATH_DSP) 199 in = read_q15x2_ia (&pSrc); 200 acc0 = __SMUAD(in, in); 201 202 /* store result in 2.14 format in destination buffer. */ 203 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); 204 #else 205 real = *pSrc++; 206 imag = *pSrc++; 207 acc0 = ((q31_t) real * real); 208 acc1 = ((q31_t) imag * imag); 209 210 /* store result in 2.14 format in destination buffer. */ 211 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); 212 #endif 213 214 /* Decrement loop counter */ 215 blkCnt--; 216 } 217 218 } 219 #endif /* defined(ARM_MATH_MVEI) */ 220 221 /** 222 @} end of cmplx_mag group 223 */