/ Drivers / CMSIS / DSP / Source / BasicMathFunctions / arm_scale_q31.c
arm_scale_q31.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_scale_q31.c
  4   * Description:  Multiplies a Q31 vector by a scalar
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/basic_math_functions.h"
 30  
 31  /**
 32    @ingroup groupMath
 33   */
 34  
 35  /**
 36    @addtogroup BasicScale
 37    @{
 38   */
 39  
 40  /**
 41    @brief         Multiplies a Q31 vector by a scalar.
 42    @param[in]     pSrc       points to the input vector
 43    @param[in]     scaleFract fractional portion of the scale value
 44    @param[in]     shift      number of bits to shift the result by
 45    @param[out]    pDst       points to the output vector
 46    @param[in]     blockSize  number of samples in each vector
 47    @return        none
 48  
 49    @par           Scaling and Overflow Behavior
 50                     The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
 51                     These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
 52   */
 53  
 54  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 55  
 56  #include "arm_helium_utils.h"
 57  
 58  void arm_scale_q31(
 59      const q31_t * pSrc,
 60      q31_t   scaleFract,
 61      int8_t  shift,
 62      q31_t * pDst,
 63      uint32_t blockSize)
 64  {
 65      uint32_t  blkCnt;           /* loop counters */
 66      q31x4_t vecSrc;
 67      q31x4_t vecDst;
 68  
 69      /* Compute 4 outputs at a time */
 70      blkCnt = blockSize >> 2;
 71      while (blkCnt > 0U)
 72      {
 73          /*
 74           * C = A * scale
 75           * Scale the input and then store the result in the destination buffer.
 76           */
 77          vecSrc = vld1q(pSrc);
 78          vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
 79          vecDst = vqshlq_r(vecDst, shift + 1);
 80          vst1q(pDst, vecDst);
 81          /*
 82           * Decrement the blockSize loop counter
 83           */
 84          blkCnt--;
 85          /*
 86           * advance vector source and destination pointers
 87           */
 88          pSrc += 4;
 89          pDst += 4;
 90      }
 91      /*
 92       * tail
 93       */
 94      blkCnt = blockSize & 3;
 95      if (blkCnt > 0U)
 96      {
 97          mve_pred16_t p0 = vctp32q(blkCnt);
 98          vecSrc = vld1q(pSrc);
 99          vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
100          vecDst = vqshlq_r(vecDst, shift + 1);
101          vstrwq_p(pDst, vecDst, p0);
102      }
103  }
104  
105  #else
106  void arm_scale_q31(
107    const q31_t *pSrc,
108          q31_t scaleFract,
109          int8_t shift,
110          q31_t *pDst,
111          uint32_t blockSize)
112  {
113          uint32_t blkCnt;                               /* Loop counter */
114          q31_t in, out;                                 /* Temporary variables */
115          int8_t kShift = shift + 1;                     /* Shift to apply after scaling */
116          int8_t sign = (kShift & 0x80);
117  
118  #if defined (ARM_MATH_LOOPUNROLL)
119  
120    /* Loop unrolling: Compute 4 outputs at a time */
121    blkCnt = blockSize >> 2U;
122  
123    if (sign == 0U)
124    {
125      while (blkCnt > 0U)
126      {
127        /* C = A * scale */
128  
129        /* Scale input and store result in destination buffer. */
130        in = *pSrc++;                                /* read input from source */
131        in = ((q63_t) in * scaleFract) >> 32;        /* multiply input with scaler value */
132        out = in << kShift;                          /* apply shifting */
133        if (in != (out >> kShift))                   /* saturate the result */
134          out = 0x7FFFFFFF ^ (in >> 31);
135        *pDst++ = out;                               /* Store result destination */
136  
137        in = *pSrc++;
138        in = ((q63_t) in * scaleFract) >> 32;
139        out = in << kShift;
140        if (in != (out >> kShift))
141          out = 0x7FFFFFFF ^ (in >> 31);
142        *pDst++ = out;
143  
144        in = *pSrc++;
145        in = ((q63_t) in * scaleFract) >> 32;
146        out = in << kShift;
147        if (in != (out >> kShift))
148          out = 0x7FFFFFFF ^ (in >> 31);
149        *pDst++ = out;
150  
151        in = *pSrc++;
152        in = ((q63_t) in * scaleFract) >> 32;
153        out = in << kShift;
154        if (in != (out >> kShift))
155          out = 0x7FFFFFFF ^ (in >> 31);
156        *pDst++ = out;
157  
158        /* Decrement loop counter */
159        blkCnt--;
160      }
161    }
162    else
163    {
164      while (blkCnt > 0U)
165      {
166        /* C = A * scale */
167  
168        /* Scale input and store result in destination buffer. */
169        in = *pSrc++;                                /* read four inputs from source */
170        in = ((q63_t) in * scaleFract) >> 32;        /* multiply input with scaler value */
171        out = in >> -kShift;                         /* apply shifting */
172        *pDst++ = out;                               /* Store result destination */
173  
174        in = *pSrc++;
175        in = ((q63_t) in * scaleFract) >> 32;
176        out = in >> -kShift;
177        *pDst++ = out;
178  
179        in = *pSrc++;
180        in = ((q63_t) in * scaleFract) >> 32;
181        out = in >> -kShift;
182        *pDst++ = out;
183  
184        in = *pSrc++;
185        in = ((q63_t) in * scaleFract) >> 32;
186        out = in >> -kShift;
187        *pDst++ = out;
188  
189        /* Decrement loop counter */
190        blkCnt--;
191      }
192    }
193  
194    /* Loop unrolling: Compute remaining outputs */
195    blkCnt = blockSize % 0x4U;
196  
197  #else
198  
199    /* Initialize blkCnt with number of samples */
200    blkCnt = blockSize;
201  
202  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
203  
204    if (sign == 0U)
205    {
206      while (blkCnt > 0U)
207      {
208        /* C = A * scale */
209  
210        /* Scale input and store result in destination buffer. */
211        in = *pSrc++;
212        in = ((q63_t) in * scaleFract) >> 32;
213        out = in << kShift;
214        if (in != (out >> kShift))
215            out = 0x7FFFFFFF ^ (in >> 31);
216        *pDst++ = out;
217  
218        /* Decrement loop counter */
219        blkCnt--;
220      }
221    }
222    else
223    {
224      while (blkCnt > 0U)
225      {
226        /* C = A * scale */
227  
228        /* Scale input and store result in destination buffer. */
229        in = *pSrc++;
230        in = ((q63_t) in * scaleFract) >> 32;
231        out = in >> -kShift;
232        *pDst++ = out;
233  
234        /* Decrement loop counter */
235        blkCnt--;
236      }
237    }
238  
239  }
240  #endif /* defined(ARM_MATH_MVEI) */
241  
242  /**
243    @} end of BasicScale group
244   */