arm_cmplx_mult_real_q15.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_cmplx_mult_real_q15.c 4 * Description: Q15 complex by real multiplication 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/complex_math_functions.h" 30 31 /** 32 @ingroup groupCmplxMath 33 */ 34 35 /** 36 @addtogroup CmplxByRealMult 37 @{ 38 */ 39 40 /** 41 @brief Q15 complex-by-real multiplication. 42 @param[in] pSrcCmplx points to complex input vector 43 @param[in] pSrcReal points to real input vector 44 @param[out] pCmplxDst points to complex output vector 45 @param[in] numSamples number of samples in each vector 46 @return none 47 48 @par Scaling and Overflow Behavior 49 The function uses saturating arithmetic. 50 Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. 51 */ 52 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 53 54 void arm_cmplx_mult_real_q15( 55 const q15_t * pSrcCmplx, 56 const q15_t * pSrcReal, 57 q15_t * pCmplxDst, 58 uint32_t numSamples) 59 { 60 static const uint16_t stride_cmplx_x_real_16[8] = { 61 0, 0, 1, 1, 2, 2, 3, 3 62 }; 63 q15x8_t rVec; 64 q15x8_t cmplxVec; 65 q15x8_t dstVec; 66 uint16x8_t strideVec; 67 uint32_t blockSizeC = numSamples * CMPLX_DIM; /* loop counters */ 68 uint32_t blkCnt; 69 q15_t in; 70 71 /* 72 * stride vector for pairs of real generation 73 */ 74 strideVec = vld1q(stride_cmplx_x_real_16); 75 76 blkCnt = blockSizeC >> 3; 77 78 while (blkCnt > 0U) 79 { 80 cmplxVec = vld1q(pSrcCmplx); 81 rVec = vldrhq_gather_shifted_offset_s16(pSrcReal, strideVec); 82 dstVec = vqdmulhq(cmplxVec, rVec); 83 vst1q(pCmplxDst, dstVec); 84 85 pSrcReal += 4; 86 pSrcCmplx += 8; 87 pCmplxDst += 8; 88 blkCnt --; 89 } 90 91 /* Tail */ 92 blkCnt = (blockSizeC & 7) >> 1; 93 while (blkCnt > 0U) 94 { 95 /* C[2 * i ] = A[2 * i ] * B[i]. */ 96 /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ 97 98 in = *pSrcReal++; 99 /* store the result in the destination buffer. */ 100 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 101 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 102 103 /* Decrement loop counter */ 104 blkCnt--; 105 } 106 } 107 #else 108 void arm_cmplx_mult_real_q15( 109 const q15_t * pSrcCmplx, 110 const q15_t * pSrcReal, 111 q15_t * pCmplxDst, 112 uint32_t numSamples) 113 { 114 uint32_t blkCnt; /* Loop counter */ 115 q15_t in; /* Temporary variable */ 116 117 #if defined (ARM_MATH_LOOPUNROLL) 118 119 #if defined (ARM_MATH_DSP) 120 q31_t inA1, inA2; /* Temporary variables to hold input data */ 121 q31_t inB1; /* Temporary variables to hold input data */ 122 q15_t out1, out2, out3, out4; /* Temporary variables to hold output data */ 123 q31_t mul1, mul2, mul3, mul4; /* Temporary variables to hold intermediate data */ 124 #endif 125 126 /* Loop unrolling: Compute 4 outputs at a time */ 127 blkCnt = numSamples >> 2U; 128 129 while (blkCnt > 0U) 130 { 131 /* C[2 * i ] = A[2 * i ] * B[i]. */ 132 /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ 133 134 #if defined (ARM_MATH_DSP) 135 /* read 2 complex numbers both real and imaginary from complex input buffer */ 136 inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx); 137 inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx); 138 /* read 2 real values at a time from real input buffer */ 139 inB1 = read_q15x2_ia ((q15_t **) &pSrcReal); 140 141 /* multiply complex number with real numbers */ 142 #ifndef ARM_MATH_BIG_ENDIAN 143 mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1)); 144 mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1)); 145 mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16)); 146 mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16)); 147 #else 148 mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16)); 149 mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16)); 150 mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1); 151 mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1); 152 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 153 154 /* saturate the result */ 155 out1 = (q15_t) __SSAT(mul1 >> 15U, 16); 156 out2 = (q15_t) __SSAT(mul2 >> 15U, 16); 157 out3 = (q15_t) __SSAT(mul3 >> 15U, 16); 158 out4 = (q15_t) __SSAT(mul4 >> 15U, 16); 159 160 /* pack real and imaginary outputs and store them to destination */ 161 write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16)); 162 write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16)); 163 164 inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx); 165 inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx); 166 inB1 = read_q15x2_ia ((q15_t **) &pSrcReal); 167 168 #ifndef ARM_MATH_BIG_ENDIAN 169 mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1)); 170 mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1)); 171 mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16)); 172 mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16)); 173 #else 174 mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16)); 175 mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16)); 176 mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1); 177 mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1); 178 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 179 180 out1 = (q15_t) __SSAT(mul1 >> 15U, 16); 181 out2 = (q15_t) __SSAT(mul2 >> 15U, 16); 182 out3 = (q15_t) __SSAT(mul3 >> 15U, 16); 183 out4 = (q15_t) __SSAT(mul4 >> 15U, 16); 184 185 write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16)); 186 write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16)); 187 #else 188 in = *pSrcReal++; 189 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 190 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 191 192 in = *pSrcReal++; 193 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 194 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 195 196 in = *pSrcReal++; 197 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 198 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 199 200 in = *pSrcReal++; 201 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 202 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 203 #endif 204 205 /* Decrement loop counter */ 206 blkCnt--; 207 } 208 209 /* Loop unrolling: Compute remaining outputs */ 210 blkCnt = numSamples % 0x4U; 211 212 #else 213 214 /* Initialize blkCnt with number of samples */ 215 blkCnt = numSamples; 216 217 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 218 219 while (blkCnt > 0U) 220 { 221 /* C[2 * i ] = A[2 * i ] * B[i]. */ 222 /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ 223 224 in = *pSrcReal++; 225 /* store the result in the destination buffer. */ 226 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 227 *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16); 228 229 /* Decrement loop counter */ 230 blkCnt--; 231 } 232 233 } 234 #endif /* defined(ARM_MATH_MVEI) */ 235 236 /** 237 @} end of CmplxByRealMult group 238 */