Cradicle Explorer

/ Drivers / CMSIS / DSP / Source / ComplexMathFunctions / arm_cmplx_mult_real_f16.c
arm_cmplx_mult_real_f16.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_cmplx_mult_real_f16.c
  4   * Description:  Floating-point complex by real multiplication
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/complex_math_functions_f16.h"
 30  
 31  #if defined(ARM_FLOAT16_SUPPORTED)
 32  
 33  /**
 34    @ingroup groupCmplxMath
 35   */
 36  
 37  /**
 38    @defgroup CmplxByRealMult Complex-by-Real Multiplication
 39  
 40    Multiplies a complex vector by a real vector and generates a complex result.
 41    The data in the complex arrays is stored in an interleaved fashion
 42    (real, imag, real, imag, ...).
 43    The parameter <code>numSamples</code> represents the number of complex
 44    samples processed.  The complex arrays have a total of <code>2*numSamples</code>
 45    real values while the real array has a total of <code>numSamples</code>
 46    real values.
 47  
 48    The underlying algorithm is used:
 49  
 50    <pre>
 51    for (n = 0; n < numSamples; n++) {
 52        pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
 53        pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
 54    }
 55    </pre>
 56  
 57    There are separate functions for floating-point, Q15, and Q31 data types.
 58   */
 59  
 60  /**
 61    @addtogroup CmplxByRealMult
 62    @{
 63   */
 64  
 65  /**
 66    @brief         Floating-point complex-by-real multiplication.
 67    @param[in]     pSrcCmplx   points to complex input vector
 68    @param[in]     pSrcReal    points to real input vector
 69    @param[out]    pCmplxDst   points to complex output vector
 70    @param[in]     numSamples  number of samples in each vector
 71    @return        none
 72   */
 73  
 74  #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 75  
 76  void arm_cmplx_mult_real_f16(
 77    const float16_t * pSrcCmplx,
 78    const float16_t * pSrcReal,
 79          float16_t * pCmplxDst,
 80          uint32_t numSamples)
 81  {
 82      static const uint16_t stride_cmplx_x_real_16[8] = {
 83          0, 0, 1, 1, 2, 2, 3, 3
 84          };
 85      uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
 86      uint32_t blkCnt;
 87      f16x8_t rVec;
 88      f16x8_t cmplxVec;
 89      f16x8_t dstVec;
 90      uint16x8_t strideVec;
 91  
 92  
 93      /* stride vector for pairs of real generation */
 94      strideVec = vld1q(stride_cmplx_x_real_16);
 95  
 96      /* Compute 4 complex outputs at a time */
 97      blkCnt = blockSizeC >> 3;
 98      while (blkCnt > 0U) 
 99      {
100          cmplxVec = vld1q(pSrcCmplx);
101          rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
102          dstVec = vmulq(cmplxVec, rVec);
103          vst1q(pCmplxDst, dstVec);
104  
105          pSrcReal += 4;
106          pSrcCmplx += 8;
107          pCmplxDst += 8;
108          blkCnt--;
109      }
110  
111      blkCnt = blockSizeC & 7;
112      if (blkCnt > 0U) {
113          mve_pred16_t p0 = vctp16q(blkCnt);
114  
115          cmplxVec = vld1q(pSrcCmplx);
116          rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
117          dstVec = vmulq(cmplxVec, rVec);
118          vstrhq_p_f16(pCmplxDst, dstVec, p0);
119      }
120  }
121  
122  #else
123  void arm_cmplx_mult_real_f16(
124    const float16_t * pSrcCmplx,
125    const float16_t * pSrcReal,
126          float16_t * pCmplxDst,
127          uint32_t numSamples)
128  {
129          uint32_t blkCnt;                               /* Loop counter */
130          float16_t in;                                  /* Temporary variable */
131  
132  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
133  
134    /* Loop unrolling: Compute 4 outputs at a time */
135    blkCnt = numSamples >> 2U;
136  
137    while (blkCnt > 0U)
138    {
139      /* C[2 * i    ] = A[2 * i    ] * B[i]. */
140      /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
141  
142      in = *pSrcReal++;
143      /* store result in destination buffer. */
144      *pCmplxDst++ = *pSrcCmplx++ * in;
145      *pCmplxDst++ = *pSrcCmplx++ * in;
146  
147      in = *pSrcReal++;
148      *pCmplxDst++ = *pSrcCmplx++ * in;
149      *pCmplxDst++ = *pSrcCmplx++ * in;
150  
151      in = *pSrcReal++;
152      *pCmplxDst++ = *pSrcCmplx++ * in;
153      *pCmplxDst++ = *pSrcCmplx++ * in;
154  
155      in = *pSrcReal++;
156      *pCmplxDst++ = *pSrcCmplx++* in;
157      *pCmplxDst++ = *pSrcCmplx++ * in;
158  
159      /* Decrement loop counter */
160      blkCnt--;
161    }
162  
163    /* Loop unrolling: Compute remaining outputs */
164    blkCnt = numSamples % 0x4U;
165  
166  #else
167  
168    /* Initialize blkCnt with number of samples */
169    blkCnt = numSamples;
170  
171  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
172  
173    while (blkCnt > 0U)
174    {
175      /* C[2 * i    ] = A[2 * i    ] * B[i]. */
176      /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
177  
178      in = *pSrcReal++;
179      /* store result in destination buffer. */
180      *pCmplxDst++ = *pSrcCmplx++ * in;
181      *pCmplxDst++ = *pSrcCmplx++ * in;
182  
183      /* Decrement loop counter */
184      blkCnt--;
185    }
186  
187  }
188  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
189  
190  /**
191    @} end of CmplxByRealMult group
192   */
193  
194  #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */