/ Drivers / CMSIS / DSP / Source / BasicMathFunctions / arm_scale_f16.c
arm_scale_f16.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_scale_f16.c
  4   * Description:  Multiplies a floating-point vector by a scalar
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/basic_math_functions_f16.h"
 30  
 31  /**
 32    @ingroup groupMath
 33   */
 34  
 35  /**
 36    @defgroup BasicScale Vector Scale
 37  
 38    Multiply a vector by a scalar value.  For floating-point data, the algorithm used is:
 39  
 40    <pre>
 41        pDst[n] = pSrc[n] * scale,   0 <= n < blockSize.
 42    </pre>
 43  
 44    In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
 45    a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
 46    The shift allows the gain of the scaling operation to exceed 1.0.
 47    The algorithm used with fixed-point data is:
 48  
 49    <pre>
 50        pDst[n] = (pSrc[n] * scaleFract) << shift,   0 <= n < blockSize.
 51    </pre>
 52  
 53    The overall scale factor applied to the fixed-point data is
 54    <pre>
 55        scale = scaleFract * 2^shift.
 56    </pre>
 57  
 58    The functions support in-place computation allowing the source and destination
 59    pointers to reference the same memory buffer.
 60   */
 61  
 62  /**
 63    @addtogroup BasicScale
 64    @{
 65   */
 66  
 67  /**
 68    @brief         Multiplies a floating-point vector by a scalar.
 69    @param[in]     pSrc       points to the input vector
 70    @param[in]     scale      scale factor to be applied
 71    @param[out]    pDst       points to the output vector
 72    @param[in]     blockSize  number of samples in each vector
 73    @return        none
 74   */
 75  
 76  #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 77  
 78  #include "arm_helium_utils.h"
 79  
 80  void arm_scale_f16(
 81    const float16_t * pSrc,
 82          float16_t scale,
 83          float16_t * pDst,
 84          uint32_t blockSize)
 85  {
 86          uint32_t blkCnt;                               /* Loop counter */
 87  
 88      f16x8_t vec1;
 89      f16x8_t res;
 90  
 91      /* Compute 4 outputs at a time */
 92      blkCnt = blockSize >> 3U;
 93  
 94      while (blkCnt > 0U)
 95      {
 96          /* C = A + offset */
 97   
 98          /* Add offset and then store the results in the destination buffer. */
 99          vec1 = vld1q(pSrc);
100          res = vmulq(vec1,scale);
101          vst1q(pDst, res);
102  
103          /* Increment pointers */
104          pSrc += 8;
105          pDst += 8;
106          
107          /* Decrement the loop counter */
108          blkCnt--;
109      }
110  
111      /* Tail */
112      blkCnt = blockSize & 0x7;
113  
114      if (blkCnt > 0U)
115      {
116          mve_pred16_t p0 = vctp16q(blkCnt);
117          vec1 = vld1q((float16_t const *) pSrc);
118          vstrhq_p(pDst, vmulq(vec1, scale), p0);
119      }
120  
121  
122  }
123  
124  #else
125  #if defined(ARM_FLOAT16_SUPPORTED)
126  void arm_scale_f16(
127    const float16_t *pSrc,
128          float16_t scale,
129          float16_t *pDst,
130          uint32_t blockSize)
131  {
132    uint32_t blkCnt;                               /* Loop counter */
133  
134  #if defined (ARM_MATH_LOOPUNROLL)
135  
136    /* Loop unrolling: Compute 4 outputs at a time */
137    blkCnt = blockSize >> 2U;
138  
139    while (blkCnt > 0U)
140    {
141      /* C = A * scale */
142  
143      /* Scale input and store result in destination buffer. */
144      *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
145  
146      *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
147  
148      *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
149  
150      *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
151  
152      /* Decrement loop counter */
153      blkCnt--;
154    }
155  
156    /* Loop unrolling: Compute remaining outputs */
157    blkCnt = blockSize % 0x4U;
158  
159  #else
160  
161    /* Initialize blkCnt with number of samples */
162    blkCnt = blockSize;
163  
164  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
165  
166    while (blkCnt > 0U)
167    {
168      /* C = A * scale */
169  
170      /* Scale input and store result in destination buffer. */
171      *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
172  
173      /* Decrement loop counter */
174      blkCnt--;
175    }
176  
177  }
178  #endif
179  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
180  
181  /**
182    @} end of BasicScale group
183   */