arm_f16_to_q15.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_float_to_q15.c 4 * Description: Converts the elements of the floating-point vector to Q15 vector 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/support_functions_f16.h" 30 31 #if defined(ARM_FLOAT16_SUPPORTED) 32 33 34 /** 35 @ingroup groupSupport 36 */ 37 38 /** 39 @addtogroup f16_to_x 40 @{ 41 */ 42 43 /** 44 @brief Converts the elements of the f16 vector to Q15 vector. 45 @param[in] pSrc points to the f16 input vector 46 @param[out] pDst points to the Q15 output vector 47 @param[in] blockSize number of samples in each vector 48 @return none 49 50 @par Details 51 The equation used for the conversion process is: 52 <pre> 53 pDst[n] = (q15_t)(pSrc[n] * 32768); 0 <= n < blockSize. 54 </pre> 55 56 @par Scaling and Overflow Behavior 57 The function uses saturating arithmetic. 58 Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. 59 60 @note 61 In order to apply rounding in scalar version, the library should be rebuilt with the ROUNDING macro 62 defined in the preprocessor section of project options. 63 */ 64 65 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) 66 67 void arm_f16_to_q15( 68 const float16_t * pSrc, 69 q15_t * pDst, 70 uint32_t blockSize) 71 { 72 float16_t maxQ = (float16_t) Q15_MAX; 73 float16x8_t vecDst; 74 75 76 do { 77 mve_pred16_t p = vctp16q(blockSize); 78 79 vecDst = vldrhq_z_f16((float16_t const *) pSrc, p); 80 /* C = A * 32767 */ 81 /* convert from float to Q15 and then store the results in the destination buffer */ 82 vecDst = vmulq_m(vuninitializedq_f16(), vecDst, maxQ, p); 83 84 vstrhq_p_s16(pDst, 85 vcvtaq_m(vuninitializedq_s16(), vecDst, p), p); 86 /* 87 * Decrement the blockSize loop counter 88 * Advance vector source and destination pointers 89 */ 90 pSrc += 8; 91 pDst += 8; 92 blockSize -= 8; 93 } 94 while ((int32_t) blockSize > 0); 95 } 96 97 #else 98 99 void arm_f16_to_q15( 100 const float16_t * pSrc, 101 q15_t * pDst, 102 uint32_t blockSize) 103 { 104 const float16_t *pIn = pSrc; /* Src pointer */ 105 uint32_t blkCnt; /* loop counter */ 106 #ifdef ARM_MATH_ROUNDING 107 float16_t in; 108 #endif /* #ifdef ARM_MATH_ROUNDING */ 109 110 /* 111 * Loop over blockSize number of values 112 */ 113 blkCnt = blockSize; 114 115 while (blkCnt > 0U) 116 { 117 118 #ifdef ARM_MATH_ROUNDING 119 120 /* 121 * C = A * 65536 122 */ 123 /* 124 * convert from float to Q31 and then store the results in the destination buffer 125 */ 126 in = *pIn++; 127 in = (in * 32768.0); 128 in += in > 0.0 ? 0.5 : -0.5; 129 *pDst++ = clip_q31_to_q15((q31_t) (in)); 130 131 #else 132 133 /* 134 * C = A * 32768 135 */ 136 /* 137 * convert from float to Q31 and then store the results in the destination buffer 138 */ 139 *pDst++ = clip_q31_to_q15((q31_t) ((_Float16)*pIn++ * 32768.0f16)); 140 141 #endif /* #ifdef ARM_MATH_ROUNDING */ 142 143 /* 144 * Decrement the loop counter 145 */ 146 blkCnt--; 147 } 148 149 } 150 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ 151 152 /** 153 @} end of f16_to_x group 154 */ 155 156 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 157