/ Drivers / CMSIS / DSP / Source / BasicMathFunctions / arm_abs_f16.c
arm_abs_f16.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_abs_f16.c
  4   * Description:  Floating-point vector absolute value
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/basic_math_functions_f16.h"
 30  #include <math.h>
 31  
 32  /**
 33    @ingroup groupMath
 34   */
 35  
 36  /**
 37    @defgroup BasicAbs Vector Absolute Value
 38  
 39    Computes the absolute value of a vector on an element-by-element basis.
 40  
 41    <pre>
 42        pDst[n] = abs(pSrc[n]),   0 <= n < blockSize.
 43    </pre>
 44  
 45    The functions support in-place computation allowing the source and
 46    destination pointers to reference the same memory buffer.
 47    There are separate functions for floating-point, Q7, Q15, and Q31 data types.
 48   */
 49  
 50  /**
 51    @addtogroup BasicAbs
 52    @{
 53   */
 54  
 55  /**
 56    @brief         Floating-point vector absolute value.
 57    @param[in]     pSrc       points to the input vector
 58    @param[out]    pDst       points to the output vector
 59    @param[in]     blockSize  number of samples in each vector
 60    @return        none
 61   */
 62  
 63  
 64  #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 65  
 66  #include "arm_helium_utils.h"
 67  
 68  void arm_abs_f16(
 69    const float16_t * pSrc,
 70          float16_t * pDst,
 71          uint32_t blockSize)
 72  {
 73      uint32_t blkCnt;                               /* Loop counter */
 74      f16x8_t vec1;
 75      f16x8_t res;
 76  
 77  
 78      /* Compute 4 outputs at a time */
 79      blkCnt = blockSize >> 3U;
 80  
 81      while (blkCnt > 0U)
 82      {
 83          /* C = |A| */
 84  
 85          /* Calculate absolute values and then store the results in the destination buffer. */
 86          vec1 = vld1q(pSrc);
 87          res = vabsq(vec1);
 88          vst1q(pDst, res);
 89  
 90          /* Increment pointers */
 91          pSrc += 8;
 92          pDst += 8;
 93          
 94          /* Decrement the loop counter */
 95          blkCnt--;
 96      }
 97  
 98      /* Tail */
 99      blkCnt = blockSize & 0x7;
100  
101  
102      if (blkCnt > 0U)
103      {
104        /* C = |A| */
105        mve_pred16_t p0 = vctp16q(blkCnt);
106        vec1 = vld1q(pSrc);
107        vstrhq_p(pDst, vabsq(vec1), p0);
108      }
109  
110  }
111  
112  #else
113  #if defined(ARM_FLOAT16_SUPPORTED)
114  void arm_abs_f16(
115    const float16_t * pSrc,
116          float16_t * pDst,
117          uint32_t blockSize)
118  {
119          uint32_t blkCnt;                               /* Loop counter */
120  
121  #if defined(ARM_MATH_NEON_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
122      f16x8_t vec1;
123      f16x8_t res;
124  
125      /* Compute 4 outputs at a time */
126      blkCnt = blockSize >> 2U;
127  
128      while (blkCnt > 0U)
129      {
130          /* C = |A| */
131  
132      	/* Calculate absolute values and then store the results in the destination buffer. */
133          vec1 = vld1q_f16(pSrc);
134          res = vabsq_f16(vec1);
135          vst1q_f16(pDst, res);
136  
137          /* Increment pointers */
138          pSrc += 4;
139          pDst += 4;
140          
141          /* Decrement the loop counter */
142          blkCnt--;
143      }
144  
145      /* Tail */
146      blkCnt = blockSize & 0x3;
147  
148  #else
149  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
150  
151    /* Loop unrolling: Compute 4 outputs at a time */
152    blkCnt = blockSize >> 2U;
153  
154    while (blkCnt > 0U)
155    {
156      /* C = |A| */
157  
158      /* Calculate absolute and store result in destination buffer. */
159      *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
160  
161      *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
162  
163      *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
164  
165      *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
166  
167      /* Decrement loop counter */
168      blkCnt--;
169    }
170  
171    /* Loop unrolling: Compute remaining outputs */
172    blkCnt = blockSize % 0x4U;
173  
174  #else
175  
176    /* Initialize blkCnt with number of samples */
177    blkCnt = blockSize;
178  
179  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
180  #endif /* #if defined(ARM_MATH_NEON) */
181  
182    while (blkCnt > 0U)
183    {
184      /* C = |A| */
185  
186      /* Calculate absolute and store result in destination buffer. */
187      *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
188  
189      /* Decrement loop counter */
190      blkCnt--;
191    }
192  
193  }
194  #endif /* defined(ARM_FLOAT16_SUPPORTED */
195  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
196  /**
197    @} end of BasicAbs group
198   */