arm_cmplx_mult_real_q31.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_cmplx_mult_real_q31.c 4 * Description: Q31 complex by real multiplication 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/complex_math_functions.h" 30 31 /** 32 @ingroup groupCmplxMath 33 */ 34 35 /** 36 @addtogroup CmplxByRealMult 37 @{ 38 */ 39 40 /** 41 @brief Q31 complex-by-real multiplication. 42 @param[in] pSrcCmplx points to complex input vector 43 @param[in] pSrcReal points to real input vector 44 @param[out] pCmplxDst points to complex output vector 45 @param[in] numSamples number of samples in each vector 46 @return none 47 48 @par Scaling and Overflow Behavior 49 The function uses saturating arithmetic. 50 Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated. 51 */ 52 53 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 54 void arm_cmplx_mult_real_q31( 55 const q31_t * pSrcCmplx, 56 const q31_t * pSrcReal, 57 q31_t * pCmplxDst, 58 uint32_t numSamples) 59 { 60 61 static const uint32_t stride_cmplx_x_real_32[4] = { 62 0, 0, 1, 1 63 }; 64 q31x4_t rVec; 65 q31x4_t cmplxVec; 66 q31x4_t dstVec; 67 uint32x4_t strideVec; 68 uint32_t blockSizeC = numSamples * CMPLX_DIM; /* loop counters */ 69 uint32_t blkCnt; 70 q31_t in; 71 72 /* 73 * stride vector for pairs of real generation 74 */ 75 strideVec = vld1q(stride_cmplx_x_real_32); 76 77 /* Compute 4 complex outputs at a time */ 78 blkCnt = blockSizeC >> 2; 79 while (blkCnt > 0U) 80 { 81 cmplxVec = vld1q(pSrcCmplx); 82 rVec = vldrwq_gather_shifted_offset_s32(pSrcReal, strideVec); 83 dstVec = vqdmulhq(cmplxVec, rVec); 84 vst1q(pCmplxDst, dstVec); 85 86 pSrcReal += 2; 87 pSrcCmplx += 4; 88 pCmplxDst += 4; 89 blkCnt --; 90 } 91 92 blkCnt = (blockSizeC & 3) >> 1; 93 while (blkCnt > 0U) 94 { 95 /* C[2 * i ] = A[2 * i ] * B[i]. */ 96 /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ 97 98 in = *pSrcReal++; 99 /* store saturated result in 1.31 format to destination buffer */ 100 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 101 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 102 103 /* Decrement loop counter */ 104 blkCnt--; 105 } 106 } 107 #else 108 void arm_cmplx_mult_real_q31( 109 const q31_t * pSrcCmplx, 110 const q31_t * pSrcReal, 111 q31_t * pCmplxDst, 112 uint32_t numSamples) 113 { 114 uint32_t blkCnt; /* Loop counter */ 115 q31_t in; /* Temporary variable */ 116 117 #if defined (ARM_MATH_LOOPUNROLL) 118 119 /* Loop unrolling: Compute 4 outputs at a time */ 120 blkCnt = numSamples >> 2U; 121 122 while (blkCnt > 0U) 123 { 124 /* C[2 * i ] = A[2 * i ] * B[i]. */ 125 /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ 126 127 in = *pSrcReal++; 128 #if defined (ARM_MATH_DSP) 129 /* store saturated result in 1.31 format to destination buffer */ 130 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 131 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 132 #else 133 /* store result in destination buffer. */ 134 *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31); 135 *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31); 136 #endif 137 138 in = *pSrcReal++; 139 #if defined (ARM_MATH_DSP) 140 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 141 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 142 #else 143 *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31); 144 *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31); 145 #endif 146 147 in = *pSrcReal++; 148 #if defined (ARM_MATH_DSP) 149 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 150 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 151 #else 152 *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31); 153 *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31); 154 #endif 155 156 in = *pSrcReal++; 157 #if defined (ARM_MATH_DSP) 158 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 159 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 160 #else 161 *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31); 162 *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31); 163 #endif 164 165 /* Decrement loop counter */ 166 blkCnt--; 167 } 168 169 /* Loop unrolling: Compute remaining outputs */ 170 blkCnt = numSamples % 0x4U; 171 172 #else 173 174 /* Initialize blkCnt with number of samples */ 175 blkCnt = numSamples; 176 177 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 178 179 while (blkCnt > 0U) 180 { 181 /* C[2 * i ] = A[2 * i ] * B[i]. */ 182 /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ 183 184 in = *pSrcReal++; 185 #if defined (ARM_MATH_DSP) 186 /* store saturated result in 1.31 format to destination buffer */ 187 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 188 *pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1); 189 #else 190 /* store result in destination buffer. */ 191 *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31); 192 *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31); 193 #endif 194 195 /* Decrement loop counter */ 196 blkCnt--; 197 } 198 199 } 200 #endif /* defined(ARM_MATH_MVEI) */ 201 202 /** 203 @} end of CmplxByRealMult group 204 */