Cradicle Explorer

/ Drivers / CMSIS / DSP / Source / ComplexMathFunctions / arm_cmplx_mult_real_q15.c
arm_cmplx_mult_real_q15.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_cmplx_mult_real_q15.c
  4   * Description:  Q15 complex by real multiplication
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/complex_math_functions.h"
 30  
 31  /**
 32    @ingroup groupCmplxMath
 33   */
 34  
 35  /**
 36    @addtogroup CmplxByRealMult
 37    @{
 38   */
 39  
 40  /**
 41    @brief         Q15 complex-by-real multiplication.
 42    @param[in]     pSrcCmplx   points to complex input vector
 43    @param[in]     pSrcReal    points to real input vector
 44    @param[out]    pCmplxDst   points to complex output vector
 45    @param[in]     numSamples  number of samples in each vector
 46    @return        none
 47  
 48    @par           Scaling and Overflow Behavior
 49                     The function uses saturating arithmetic.
 50                     Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
 51   */
 52  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 53  
 54  void arm_cmplx_mult_real_q15(
 55    const q15_t * pSrcCmplx,
 56    const q15_t * pSrcReal,
 57          q15_t * pCmplxDst,
 58          uint32_t numSamples)
 59  {
 60    static const uint16_t stride_cmplx_x_real_16[8] = {
 61        0, 0, 1, 1, 2, 2, 3, 3
 62        };
 63    q15x8_t rVec;
 64    q15x8_t cmplxVec;
 65    q15x8_t dstVec;
 66    uint16x8_t strideVec;
 67    uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
 68    uint32_t blkCnt;
 69    q15_t in;  
 70  
 71    /*
 72    * stride vector for pairs of real generation
 73    */
 74    strideVec = vld1q(stride_cmplx_x_real_16);
 75  
 76    blkCnt = blockSizeC >> 3;
 77  
 78    while (blkCnt > 0U) 
 79    {
 80      cmplxVec = vld1q(pSrcCmplx);
 81      rVec = vldrhq_gather_shifted_offset_s16(pSrcReal, strideVec);
 82      dstVec = vqdmulhq(cmplxVec, rVec);
 83      vst1q(pCmplxDst, dstVec);
 84  
 85      pSrcReal += 4;
 86      pSrcCmplx += 8;
 87      pCmplxDst += 8;
 88      blkCnt --;
 89    }
 90  
 91    /* Tail */
 92    blkCnt = (blockSizeC & 7) >> 1;
 93    while (blkCnt > 0U)
 94    {
 95      /* C[2 * i    ] = A[2 * i    ] * B[i]. */
 96      /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
 97  
 98      in = *pSrcReal++;
 99      /* store the result in the destination buffer. */
100      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
101      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
102  
103      /* Decrement loop counter */
104      blkCnt--;
105    }
106  }
107  #else
108  void arm_cmplx_mult_real_q15(
109    const q15_t * pSrcCmplx,
110    const q15_t * pSrcReal,
111          q15_t * pCmplxDst,
112          uint32_t numSamples)
113  {
114          uint32_t blkCnt;                               /* Loop counter */
115          q15_t in;                                      /* Temporary variable */
116  
117  #if defined (ARM_MATH_LOOPUNROLL)
118  
119  #if defined (ARM_MATH_DSP)
120          q31_t inA1, inA2;                              /* Temporary variables to hold input data */
121          q31_t inB1;                                    /* Temporary variables to hold input data */
122          q15_t out1, out2, out3, out4;                  /* Temporary variables to hold output data */
123          q31_t mul1, mul2, mul3, mul4;                  /* Temporary variables to hold intermediate data */
124  #endif
125  
126    /* Loop unrolling: Compute 4 outputs at a time */
127    blkCnt = numSamples >> 2U;
128  
129    while (blkCnt > 0U)
130    {
131      /* C[2 * i    ] = A[2 * i    ] * B[i]. */
132      /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
133  
134  #if defined (ARM_MATH_DSP)
135      /* read 2 complex numbers both real and imaginary from complex input buffer */
136      inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
137      inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
138      /* read 2 real values at a time from real input buffer */
139      inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
140  
141      /* multiply complex number with real numbers */
142  #ifndef ARM_MATH_BIG_ENDIAN
143      mul1 = (q31_t) ((q15_t) (inA1)       * (q15_t) (inB1));
144      mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
145      mul3 = (q31_t) ((q15_t) (inA2)       * (q15_t) (inB1 >> 16));
146      mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
147  #else
148      mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
149      mul1 = (q31_t) ((q15_t) inA1         * (q15_t) (inB1 >> 16));
150      mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
151      mul3 = (q31_t) ((q15_t) inA2         * (q15_t) inB1);
152  #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
153  
154      /* saturate the result */
155      out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
156      out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
157      out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
158      out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
159  
160      /* pack real and imaginary outputs and store them to destination */
161      write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16));
162      write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16));
163  
164      inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
165      inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
166      inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
167  
168  #ifndef ARM_MATH_BIG_ENDIAN
169      mul1 = (q31_t) ((q15_t) (inA1)       * (q15_t) (inB1));
170      mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
171      mul3 = (q31_t) ((q15_t) (inA2)       * (q15_t) (inB1 >> 16));
172      mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
173  #else
174      mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
175      mul1 = (q31_t) ((q15_t) inA1         * (q15_t) (inB1 >> 16));
176      mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
177      mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
178  #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
179  
180      out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
181      out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
182      out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
183      out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
184  
185      write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16));
186      write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16));
187  #else
188      in = *pSrcReal++;
189      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
190      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
191  
192      in = *pSrcReal++;
193      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
194      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
195  
196      in = *pSrcReal++;
197      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
198      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
199  
200      in = *pSrcReal++;
201      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
202      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
203  #endif
204  
205      /* Decrement loop counter */
206      blkCnt--;
207    }
208  
209    /* Loop unrolling: Compute remaining outputs */
210    blkCnt = numSamples % 0x4U;
211  
212  #else
213  
214    /* Initialize blkCnt with number of samples */
215    blkCnt = numSamples;
216  
217  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
218  
219    while (blkCnt > 0U)
220    {
221      /* C[2 * i    ] = A[2 * i    ] * B[i]. */
222      /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
223  
224      in = *pSrcReal++;
225      /* store the result in the destination buffer. */
226      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
227      *pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
228  
229      /* Decrement loop counter */
230      blkCnt--;
231    }
232  
233  }
234  #endif /* defined(ARM_MATH_MVEI) */
235  
236  /**
237    @} end of CmplxByRealMult group
238   */