Cradicle Explorer

/ Drivers / CMSIS / DSP / Source / BasicMathFunctions / arm_scale_f32.c
arm_scale_f32.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_scale_f32.c
  4   * Description:  Multiplies a floating-point vector by a scalar
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/basic_math_functions.h"
 30  
 31  /**
 32    @ingroup groupMath
 33   */
 34  
 35  /**
 36    @defgroup BasicScale Vector Scale
 37  
 38    Multiply a vector by a scalar value.  For floating-point data, the algorithm used is:
 39  
 40    <pre>
 41        pDst[n] = pSrc[n] * scale,   0 <= n < blockSize.
 42    </pre>
 43  
 44    In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
 45    a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
 46    The shift allows the gain of the scaling operation to exceed 1.0.
 47    The algorithm used with fixed-point data is:
 48  
 49    <pre>
 50        pDst[n] = (pSrc[n] * scaleFract) << shift,   0 <= n < blockSize.
 51    </pre>
 52  
 53    The overall scale factor applied to the fixed-point data is
 54    <pre>
 55        scale = scaleFract * 2^shift.
 56    </pre>
 57  
 58    The functions support in-place computation allowing the source and destination
 59    pointers to reference the same memory buffer.
 60   */
 61  
 62  /**
 63    @addtogroup BasicScale
 64    @{
 65   */
 66  
 67  /**
 68    @brief         Multiplies a floating-point vector by a scalar.
 69    @param[in]     pSrc       points to the input vector
 70    @param[in]     scale      scale factor to be applied
 71    @param[out]    pDst       points to the output vector
 72    @param[in]     blockSize  number of samples in each vector
 73    @return        none
 74   */
 75  
 76  #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 77  
 78  #include "arm_helium_utils.h"
 79  
 80  void arm_scale_f32(
 81    const float32_t * pSrc,
 82          float32_t scale,
 83          float32_t * pDst,
 84          uint32_t blockSize)
 85  {
 86          uint32_t blkCnt;                               /* Loop counter */
 87  
 88      f32x4_t vec1;
 89      f32x4_t res;
 90  
 91      /* Compute 4 outputs at a time */
 92      blkCnt = blockSize >> 2U;
 93  
 94      while (blkCnt > 0U)
 95      {
 96          /* C = A + offset */
 97   
 98          /* Add offset and then store the results in the destination buffer. */
 99          vec1 = vld1q(pSrc);
100          res = vmulq(vec1,scale);
101          vst1q(pDst, res);
102  
103          /* Increment pointers */
104          pSrc += 4;
105          pDst += 4;
106          
107          /* Decrement the loop counter */
108          blkCnt--;
109      }
110  
111      /* Tail */
112      blkCnt = blockSize & 0x3;
113  
114      if (blkCnt > 0U)
115      {
116          mve_pred16_t p0 = vctp32q(blkCnt);
117          vec1 = vld1q((float32_t const *) pSrc);
118          vstrwq_p(pDst, vmulq(vec1, scale), p0);
119      }
120  
121  
122  }
123  
124  #else
125  void arm_scale_f32(
126    const float32_t *pSrc,
127          float32_t scale,
128          float32_t *pDst,
129          uint32_t blockSize)
130  {
131    uint32_t blkCnt;                               /* Loop counter */
132  #if defined(ARM_MATH_NEON_EXPERIMENTAL)
133      f32x4_t vec1;
134      f32x4_t res;
135  
136      /* Compute 4 outputs at a time */
137      blkCnt = blockSize >> 2U;
138  
139      while (blkCnt > 0U)
140      {
141          /* C = A * scale */
142  
143      	/* Scale the input and then store the results in the destination buffer. */
144          vec1 = vld1q_f32(pSrc);
145          res = vmulq_f32(vec1, vdupq_n_f32(scale));
146          vst1q_f32(pDst, res);
147  
148          /* Increment pointers */
149          pSrc += 4; 
150          pDst += 4;
151          
152          /* Decrement the loop counter */
153          blkCnt--;
154      }
155  
156      /* Tail */
157      blkCnt = blockSize & 0x3;
158  
159  #else
160  #if defined (ARM_MATH_LOOPUNROLL)
161  
162    /* Loop unrolling: Compute 4 outputs at a time */
163    blkCnt = blockSize >> 2U;
164  
165    while (blkCnt > 0U)
166    {
167      float32_t in1, in2, in3, in4;
168  
169      /* C = A * scale */
170  
171      /* Scale input and store result in destination buffer. */
172      in1 = (*pSrc++) * scale;
173  
174      in2 = (*pSrc++) * scale;
175  
176      in3 = (*pSrc++) * scale;
177  
178      in4 = (*pSrc++) * scale;
179  
180      *pDst++ = in1;
181      *pDst++ = in2;
182      *pDst++ = in3;
183      *pDst++ = in4;
184  
185      /* Decrement loop counter */
186      blkCnt--;
187    }
188  
189    /* Loop unrolling: Compute remaining outputs */
190    blkCnt = blockSize % 0x4U;
191  
192  #else
193  
194    /* Initialize blkCnt with number of samples */
195    blkCnt = blockSize;
196  
197  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
198  #endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
199  
200    while (blkCnt > 0U)
201    {
202      /* C = A * scale */
203  
204      /* Scale input and store result in destination buffer. */
205      *pDst++ = (*pSrc++) * scale;
206  
207      /* Decrement loop counter */
208      blkCnt--;
209    }
210  
211  }
212  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
213  
214  /**
215    @} end of BasicScale group
216   */