arm_cmplx_mult_real_f16.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_cmplx_mult_real_f16.c 4 * Description: Floating-point complex by real multiplication 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/complex_math_functions_f16.h" 30 31 #if defined(ARM_FLOAT16_SUPPORTED) 32 33 /** 34 @ingroup groupCmplxMath 35 */ 36 37 /** 38 @defgroup CmplxByRealMult Complex-by-Real Multiplication 39 40 Multiplies a complex vector by a real vector and generates a complex result. 41 The data in the complex arrays is stored in an interleaved fashion 42 (real, imag, real, imag, ...). 43 The parameter <code>numSamples</code> represents the number of complex 44 samples processed. The complex arrays have a total of <code>2*numSamples</code> 45 real values while the real array has a total of <code>numSamples</code> 46 real values. 47 48 The underlying algorithm is used: 49 50 <pre> 51 for (n = 0; n < numSamples; n++) { 52 pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n]; 53 pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n]; 54 } 55 </pre> 56 57 There are separate functions for floating-point, Q15, and Q31 data types. 58 */ 59 60 /** 61 @addtogroup CmplxByRealMult 62 @{ 63 */ 64 65 /** 66 @brief Floating-point complex-by-real multiplication. 67 @param[in] pSrcCmplx points to complex input vector 68 @param[in] pSrcReal points to real input vector 69 @param[out] pCmplxDst points to complex output vector 70 @param[in] numSamples number of samples in each vector 71 @return none 72 */ 73 74 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) 75 76 void arm_cmplx_mult_real_f16( 77 const float16_t * pSrcCmplx, 78 const float16_t * pSrcReal, 79 float16_t * pCmplxDst, 80 uint32_t numSamples) 81 { 82 static const uint16_t stride_cmplx_x_real_16[8] = { 83 0, 0, 1, 1, 2, 2, 3, 3 84 }; 85 uint32_t blockSizeC = numSamples * CMPLX_DIM; /* loop counters */ 86 uint32_t blkCnt; 87 f16x8_t rVec; 88 f16x8_t cmplxVec; 89 f16x8_t dstVec; 90 uint16x8_t strideVec; 91 92 93 /* stride vector for pairs of real generation */ 94 strideVec = vld1q(stride_cmplx_x_real_16); 95 96 /* Compute 4 complex outputs at a time */ 97 blkCnt = blockSizeC >> 3; 98 while (blkCnt > 0U) 99 { 100 cmplxVec = vld1q(pSrcCmplx); 101 rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec); 102 dstVec = vmulq(cmplxVec, rVec); 103 vst1q(pCmplxDst, dstVec); 104 105 pSrcReal += 4; 106 pSrcCmplx += 8; 107 pCmplxDst += 8; 108 blkCnt--; 109 } 110 111 blkCnt = blockSizeC & 7; 112 if (blkCnt > 0U) { 113 mve_pred16_t p0 = vctp16q(blkCnt); 114 115 cmplxVec = vld1q(pSrcCmplx); 116 rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec); 117 dstVec = vmulq(cmplxVec, rVec); 118 vstrhq_p_f16(pCmplxDst, dstVec, p0); 119 } 120 } 121 122 #else 123 void arm_cmplx_mult_real_f16( 124 const float16_t * pSrcCmplx, 125 const float16_t * pSrcReal, 126 float16_t * pCmplxDst, 127 uint32_t numSamples) 128 { 129 uint32_t blkCnt; /* Loop counter */ 130 float16_t in; /* Temporary variable */ 131 132 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) 133 134 /* Loop unrolling: Compute 4 outputs at a time */ 135 blkCnt = numSamples >> 2U; 136 137 while (blkCnt > 0U) 138 { 139 /* C[2 * i ] = A[2 * i ] * B[i]. */ 140 /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ 141 142 in = *pSrcReal++; 143 /* store result in destination buffer. */ 144 *pCmplxDst++ = *pSrcCmplx++ * in; 145 *pCmplxDst++ = *pSrcCmplx++ * in; 146 147 in = *pSrcReal++; 148 *pCmplxDst++ = *pSrcCmplx++ * in; 149 *pCmplxDst++ = *pSrcCmplx++ * in; 150 151 in = *pSrcReal++; 152 *pCmplxDst++ = *pSrcCmplx++ * in; 153 *pCmplxDst++ = *pSrcCmplx++ * in; 154 155 in = *pSrcReal++; 156 *pCmplxDst++ = *pSrcCmplx++* in; 157 *pCmplxDst++ = *pSrcCmplx++ * in; 158 159 /* Decrement loop counter */ 160 blkCnt--; 161 } 162 163 /* Loop unrolling: Compute remaining outputs */ 164 blkCnt = numSamples % 0x4U; 165 166 #else 167 168 /* Initialize blkCnt with number of samples */ 169 blkCnt = numSamples; 170 171 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 172 173 while (blkCnt > 0U) 174 { 175 /* C[2 * i ] = A[2 * i ] * B[i]. */ 176 /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ 177 178 in = *pSrcReal++; 179 /* store result in destination buffer. */ 180 *pCmplxDst++ = *pSrcCmplx++ * in; 181 *pCmplxDst++ = *pSrcCmplx++ * in; 182 183 /* Decrement loop counter */ 184 blkCnt--; 185 } 186 187 } 188 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ 189 190 /** 191 @} end of CmplxByRealMult group 192 */ 193 194 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */