Cradicle Explorer

/ Drivers / CMSIS / DSP / Source / SupportFunctions / arm_float_to_q7.c
arm_float_to_q7.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_float_to_q7.c
  4   * Description:  Converts the elements of the floating-point vector to Q7 vector
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/support_functions.h"
 30  
 31  /**
 32    @ingroup groupSupport
 33   */
 34  
 35  /**
 36    @addtogroup float_to_x
 37    @{
 38   */
 39  
 40  /**
 41   * @brief Converts the elements of the floating-point vector to Q7 vector.
 42   * @param[in]       *pSrc points to the floating-point input vector
 43   * @param[out]      *pDst points to the Q7 output vector
 44   * @param[in]       blockSize length of the input vector
 45   * @return none.
 46   *
 47   *\par Description:
 48   * \par
 49   * The equation used for the conversion process is:
 50   * <pre>
 51   * 	pDst[n] = (q7_t)(pSrc[n] * 128);   0 <= n < blockSize.
 52   * </pre>
 53   * \par Scaling and Overflow Behavior:
 54   * \par
 55   * The function uses saturating arithmetic.
 56   * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
 57   * \note
 58   * In order to apply rounding, the library should be rebuilt with the ROUNDING macro
 59   * defined in the preprocessor section of project options.
 60   */
 61  #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 62  void arm_float_to_q7(
 63    const float32_t * pSrc,
 64    q7_t * pDst,
 65    uint32_t blockSize)
 66  {
 67      uint32_t         blkCnt;     /* loop counters */
 68      float32_t       maxQ = powf(2.0, 7);
 69      f32x4x4_t       tmp;
 70      q15x8_t         evVec, oddVec;
 71      q7x16_t         vecDst;
 72      float32_t const *pSrcVec;
 73  #ifdef ARM_MATH_ROUNDING
 74      float32_t in;
 75  #endif
 76  
 77      pSrcVec = (float32_t const *) pSrc;
 78      blkCnt = blockSize >> 4;
 79      while (blkCnt > 0U) {
 80          tmp = vld4q(pSrcVec);
 81          pSrcVec += 16;
 82          /*
 83           * C = A * 128.0
 84           * convert from float to q7 and then store the results in the destination buffer
 85           */
 86          tmp.val[0] = vmulq(tmp.val[0], maxQ);
 87          tmp.val[1] = vmulq(tmp.val[1], maxQ);
 88          tmp.val[2] = vmulq(tmp.val[2], maxQ);
 89          tmp.val[3] = vmulq(tmp.val[3], maxQ);
 90  
 91          /*
 92           * convert and pack evens
 93           */
 94          evVec = vqmovnbq(evVec, vcvtaq_s32_f32(tmp.val[0]));
 95          evVec = vqmovntq(evVec, vcvtaq_s32_f32(tmp.val[2]));
 96          /*
 97           * convert and pack odds
 98           */
 99          oddVec = vqmovnbq(oddVec, vcvtaq_s32_f32(tmp.val[1]));
100          oddVec = vqmovntq(oddVec, vcvtaq_s32_f32(tmp.val[3]));
101          /*
102           * merge
103           */
104          vecDst = vqmovnbq(vecDst, evVec);
105          vecDst = vqmovntq(vecDst, oddVec);
106  
107          vst1q(pDst, vecDst);
108          pDst += 16;
109          /*
110           * Decrement the blockSize loop counter
111           */
112          blkCnt--;
113      }
114  
115    blkCnt = blockSize & 0xF;
116    while (blkCnt > 0U)
117    {
118      /* C = A * 128 */
119  
120      /* Convert from float to q7 and store result in destination buffer */
121  #ifdef ARM_MATH_ROUNDING
122  
123      in = (*pSrcVec++ * 128);
124      in += in > 0.0f ? 0.5f : -0.5f;
125      *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
126  
127  #else
128  
129      *pDst++ = (q7_t) __SSAT((q31_t) (*pSrcVec++ * 128.0f), 8);
130  
131  #endif /* #ifdef ARM_MATH_ROUNDING */
132  
133      /* Decrement loop counter */
134      blkCnt--;
135    }
136  
137  }
138  #else
139  #if defined(ARM_MATH_NEON)
140  void arm_float_to_q7(
141    const float32_t * pSrc,
142    q7_t * pDst,
143    uint32_t blockSize)
144  {
145    const float32_t *pIn = pSrc;                         /* Src pointer */
146    uint32_t blkCnt;                               /* loop counter */
147  
148    float32x4_t inV;
149    #ifdef ARM_MATH_ROUNDING
150    float32_t in;
151    float32x4_t zeroV = vdupq_n_f32(0.0f);
152    float32x4_t pHalf = vdupq_n_f32(0.5f / 128.0f);
153    float32x4_t mHalf = vdupq_n_f32(-0.5f / 128.0f);
154    float32x4_t r;
155    uint32x4_t cmp;
156    #endif
157  
158    int16x4_t cvt1,cvt2;
159    int8x8_t outV;
160  
161    blkCnt = blockSize >> 3U;
162  
163    /* Compute 8 outputs at a time.
164     ** a second loop below computes the remaining 1 to 7 samples. */
165    while (blkCnt > 0U)
166    {
167  
168  #ifdef ARM_MATH_ROUNDING
169      /* C = A * 128 */
170      /* Convert from float to q7 and then store the results in the destination buffer */
171      inV = vld1q_f32(pIn);
172      cmp = vcgtq_f32(inV,zeroV);
173      r = vbslq_f32(cmp,pHalf,mHalf);
174      inV = vaddq_f32(inV, r);
175      cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
176      pIn += 4;
177  
178      inV = vld1q_f32(pIn);
179      cmp = vcgtq_f32(inV,zeroV);
180      r = vbslq_f32(cmp,pHalf,mHalf);
181      inV = vaddq_f32(inV, r);
182      cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
183      pIn += 4;
184      
185      outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
186      vst1_s8(pDst, outV);
187      pDst += 8;
188  
189  #else
190  
191      /* C = A * 128 */
192      /* Convert from float to q7 and then store the results in the destination buffer */
193      inV = vld1q_f32(pIn);
194      cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
195      pIn += 4;
196  
197      inV = vld1q_f32(pIn);
198      cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
199      pIn += 4;
200  
201      outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
202  
203      vst1_s8(pDst, outV);
204      pDst += 8;
205  #endif /*      #ifdef ARM_MATH_ROUNDING        */
206  
207      /* Decrement the loop counter */
208      blkCnt--;
209    }
210  
211    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
212     ** No loop unrolling is used. */
213    blkCnt = blockSize & 7;
214  
215    while (blkCnt > 0U)
216    {
217  
218  #ifdef ARM_MATH_ROUNDING
219      /* C = A * 128 */
220      /* Convert from float to q7 and then store the results in the destination buffer */
221      in = *pIn++;
222      in = (in * 128);
223      in += in > 0.0f ? 0.5f : -0.5f;
224      *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
225  
226  #else
227  
228      /* C = A * 128 */
229      /* Convert from float to q7 and then store the results in the destination buffer */
230      *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
231  
232  #endif /*      #ifdef ARM_MATH_ROUNDING        */
233  
234      /* Decrement the loop counter */
235      blkCnt--;
236    }
237  
238  }
239  #else
240  void arm_float_to_q7(
241    const float32_t * pSrc,
242          q7_t * pDst,
243          uint32_t blockSize)
244  {
245          uint32_t blkCnt;                               /* Loop counter */
246    const float32_t *pIn = pSrc;                         /* Source pointer */
247  
248  #ifdef ARM_MATH_ROUNDING
249          float32_t in;
250  #endif /* #ifdef ARM_MATH_ROUNDING */
251  
252  #if defined (ARM_MATH_LOOPUNROLL)
253  
254    /* Loop unrolling: Compute 4 outputs at a time */
255    blkCnt = blockSize >> 2U;
256  
257    while (blkCnt > 0U)
258    {
259      /* C = A * 128 */
260  
261      /* Convert from float to q7 and store result in destination buffer */
262  #ifdef ARM_MATH_ROUNDING
263  
264      in = (*pIn++ * 128);
265      in += in > 0.0f ? 0.5f : -0.5f;
266      *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
267  
268      in = (*pIn++ * 128);
269      in += in > 0.0f ? 0.5f : -0.5f;
270      *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
271  
272      in = (*pIn++ * 128);
273      in += in > 0.0f ? 0.5f : -0.5f;
274      *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
275  
276      in = (*pIn++ * 128);
277      in += in > 0.0f ? 0.5f : -0.5f;
278      *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
279  
280  #else
281  
282      *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
283      *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
284      *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
285      *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
286  
287  #endif /* #ifdef ARM_MATH_ROUNDING */
288  
289      /* Decrement loop counter */
290      blkCnt--;
291    }
292  
293    /* Loop unrolling: Compute remaining outputs */
294    blkCnt = blockSize % 0x4U;
295  
296  #else
297  
298    /* Initialize blkCnt with number of samples */
299    blkCnt = blockSize;
300  
301  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
302  
303    while (blkCnt > 0U)
304    {
305      /* C = A * 128 */
306  
307      /* Convert from float to q7 and store result in destination buffer */
308  #ifdef ARM_MATH_ROUNDING
309  
310      in = (*pIn++ * 128);
311      in += in > 0.0f ? 0.5f : -0.5f;
312      *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
313  
314  #else
315  
316      *pDst++ = (q7_t) __SSAT((q31_t) (*pIn++ * 128.0f), 8);
317  
318  #endif /* #ifdef ARM_MATH_ROUNDING */
319  
320      /* Decrement loop counter */
321      blkCnt--;
322    }
323  
324  }
325  #endif /* #if defined(ARM_MATH_NEON) */
326  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
327  
328  /**
329    @} end of float_to_x group
330   */