arm_abs_f16.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_abs_f16.c 4 * Description: Floating-point vector absolute value 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/basic_math_functions_f16.h" 30 #include <math.h> 31 32 /** 33 @ingroup groupMath 34 */ 35 36 /** 37 @defgroup BasicAbs Vector Absolute Value 38 39 Computes the absolute value of a vector on an element-by-element basis. 40 41 <pre> 42 pDst[n] = abs(pSrc[n]), 0 <= n < blockSize. 43 </pre> 44 45 The functions support in-place computation allowing the source and 46 destination pointers to reference the same memory buffer. 47 There are separate functions for floating-point, Q7, Q15, and Q31 data types. 48 */ 49 50 /** 51 @addtogroup BasicAbs 52 @{ 53 */ 54 55 /** 56 @brief Floating-point vector absolute value. 57 @param[in] pSrc points to the input vector 58 @param[out] pDst points to the output vector 59 @param[in] blockSize number of samples in each vector 60 @return none 61 */ 62 63 64 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) 65 66 #include "arm_helium_utils.h" 67 68 void arm_abs_f16( 69 const float16_t * pSrc, 70 float16_t * pDst, 71 uint32_t blockSize) 72 { 73 uint32_t blkCnt; /* Loop counter */ 74 f16x8_t vec1; 75 f16x8_t res; 76 77 78 /* Compute 4 outputs at a time */ 79 blkCnt = blockSize >> 3U; 80 81 while (blkCnt > 0U) 82 { 83 /* C = |A| */ 84 85 /* Calculate absolute values and then store the results in the destination buffer. */ 86 vec1 = vld1q(pSrc); 87 res = vabsq(vec1); 88 vst1q(pDst, res); 89 90 /* Increment pointers */ 91 pSrc += 8; 92 pDst += 8; 93 94 /* Decrement the loop counter */ 95 blkCnt--; 96 } 97 98 /* Tail */ 99 blkCnt = blockSize & 0x7; 100 101 102 if (blkCnt > 0U) 103 { 104 /* C = |A| */ 105 mve_pred16_t p0 = vctp16q(blkCnt); 106 vec1 = vld1q(pSrc); 107 vstrhq_p(pDst, vabsq(vec1), p0); 108 } 109 110 } 111 112 #else 113 #if defined(ARM_FLOAT16_SUPPORTED) 114 void arm_abs_f16( 115 const float16_t * pSrc, 116 float16_t * pDst, 117 uint32_t blockSize) 118 { 119 uint32_t blkCnt; /* Loop counter */ 120 121 #if defined(ARM_MATH_NEON_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) 122 f16x8_t vec1; 123 f16x8_t res; 124 125 /* Compute 4 outputs at a time */ 126 blkCnt = blockSize >> 2U; 127 128 while (blkCnt > 0U) 129 { 130 /* C = |A| */ 131 132 /* Calculate absolute values and then store the results in the destination buffer. */ 133 vec1 = vld1q_f16(pSrc); 134 res = vabsq_f16(vec1); 135 vst1q_f16(pDst, res); 136 137 /* Increment pointers */ 138 pSrc += 4; 139 pDst += 4; 140 141 /* Decrement the loop counter */ 142 blkCnt--; 143 } 144 145 /* Tail */ 146 blkCnt = blockSize & 0x3; 147 148 #else 149 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) 150 151 /* Loop unrolling: Compute 4 outputs at a time */ 152 blkCnt = blockSize >> 2U; 153 154 while (blkCnt > 0U) 155 { 156 /* C = |A| */ 157 158 /* Calculate absolute and store result in destination buffer. */ 159 *pDst++ = (_Float16)fabsf((float32_t)*pSrc++); 160 161 *pDst++ = (_Float16)fabsf((float32_t)*pSrc++); 162 163 *pDst++ = (_Float16)fabsf((float32_t)*pSrc++); 164 165 *pDst++ = (_Float16)fabsf((float32_t)*pSrc++); 166 167 /* Decrement loop counter */ 168 blkCnt--; 169 } 170 171 /* Loop unrolling: Compute remaining outputs */ 172 blkCnt = blockSize % 0x4U; 173 174 #else 175 176 /* Initialize blkCnt with number of samples */ 177 blkCnt = blockSize; 178 179 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 180 #endif /* #if defined(ARM_MATH_NEON) */ 181 182 while (blkCnt > 0U) 183 { 184 /* C = |A| */ 185 186 /* Calculate absolute and store result in destination buffer. */ 187 *pDst++ = (_Float16)fabsf((float32_t)*pSrc++); 188 189 /* Decrement loop counter */ 190 blkCnt--; 191 } 192 193 } 194 #endif /* defined(ARM_FLOAT16_SUPPORTED */ 195 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ 196 /** 197 @} end of BasicAbs group 198 */