arm_cmplx_mag_squared_q31.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_cmplx_mag_squared_q31.c 4 * Description: Q31 complex magnitude squared 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/complex_math_functions.h" 30 31 /** 32 @ingroup groupCmplxMath 33 */ 34 35 /** 36 @addtogroup cmplx_mag_squared 37 @{ 38 */ 39 40 /** 41 @brief Q31 complex magnitude squared. 42 @param[in] pSrc points to input vector 43 @param[out] pDst points to output vector 44 @param[in] numSamples number of samples in each vector 45 @return none 46 47 @par Scaling and Overflow Behavior 48 The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format. 49 Input down scaling is not required. 50 */ 51 52 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 53 54 void arm_cmplx_mag_squared_q31( 55 const q31_t * pSrc, 56 q31_t * pDst, 57 uint32_t numSamples) 58 { 59 int32_t blockSize = numSamples; /* loop counters */ 60 uint32_t blkCnt; /* loop counters */ 61 q31x4x2_t vecSrc; 62 q31x4_t vReal, vImag; 63 q31x4_t vMagSq; 64 q31_t real, imag; /* Temporary input variables */ 65 q31_t acc0, acc1; /* Accumulators */ 66 67 /* Compute 4 complex samples at a time */ 68 blkCnt = blockSize >> 2; 69 while (blkCnt > 0U) 70 { 71 vecSrc = vld2q(pSrc); 72 vReal = vmulhq(vecSrc.val[0], vecSrc.val[0]); 73 vImag = vmulhq(vecSrc.val[1], vecSrc.val[1]); 74 vMagSq = vqaddq(vReal, vImag); 75 vMagSq = vshrq(vMagSq, 1); 76 77 vst1q(pDst, vMagSq); 78 79 pSrc += 8; 80 pDst += 4; 81 /* 82 * Decrement the blkCnt loop counter 83 * Advance vector source and destination pointers 84 */ 85 blkCnt --; 86 } 87 88 /* Tail */ 89 blkCnt = blockSize & 3; 90 while (blkCnt > 0U) 91 { 92 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ 93 94 real = *pSrc++; 95 imag = *pSrc++; 96 acc0 = (q31_t) (((q63_t) real * real) >> 33); 97 acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 98 99 /* store result in 3.29 format in destination buffer. */ 100 *pDst++ = acc0 + acc1; 101 102 /* Decrement loop counter */ 103 blkCnt--; 104 } 105 } 106 107 #else 108 void arm_cmplx_mag_squared_q31( 109 const q31_t * pSrc, 110 q31_t * pDst, 111 uint32_t numSamples) 112 { 113 uint32_t blkCnt; /* Loop counter */ 114 q31_t real, imag; /* Temporary input variables */ 115 q31_t acc0, acc1; /* Accumulators */ 116 117 #if defined (ARM_MATH_LOOPUNROLL) 118 119 /* Loop unrolling: Compute 4 outputs at a time */ 120 blkCnt = numSamples >> 2U; 121 122 while (blkCnt > 0U) 123 { 124 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ 125 126 real = *pSrc++; 127 imag = *pSrc++; 128 acc0 = (q31_t) (((q63_t) real * real) >> 33); 129 acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 130 /* store the result in 3.29 format in the destination buffer. */ 131 *pDst++ = acc0 + acc1; 132 133 real = *pSrc++; 134 imag = *pSrc++; 135 acc0 = (q31_t) (((q63_t) real * real) >> 33); 136 acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 137 *pDst++ = acc0 + acc1; 138 139 real = *pSrc++; 140 imag = *pSrc++; 141 acc0 = (q31_t) (((q63_t) real * real) >> 33); 142 acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 143 *pDst++ = acc0 + acc1; 144 145 real = *pSrc++; 146 imag = *pSrc++; 147 acc0 = (q31_t) (((q63_t) real * real) >> 33); 148 acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 149 *pDst++ = acc0 + acc1; 150 151 /* Decrement loop counter */ 152 blkCnt--; 153 } 154 155 /* Loop unrolling: Compute remaining outputs */ 156 blkCnt = numSamples % 0x4U; 157 158 #else 159 160 /* Initialize blkCnt with number of samples */ 161 blkCnt = numSamples; 162 163 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 164 165 while (blkCnt > 0U) 166 { 167 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ 168 169 real = *pSrc++; 170 imag = *pSrc++; 171 acc0 = (q31_t) (((q63_t) real * real) >> 33); 172 acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 173 174 /* store result in 3.29 format in destination buffer. */ 175 *pDst++ = acc0 + acc1; 176 177 /* Decrement loop counter */ 178 blkCnt--; 179 } 180 181 } 182 183 #endif /* defined(ARM_MATH_MVEI) */ 184 185 /** 186 @} end of cmplx_mag_squared group 187 */