/ Drivers / CMSIS / DSP / Source / BasicMathFunctions / arm_abs_f32.c
arm_abs_f32.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_abs_f32.c
  4   * Description:  Floating-point vector absolute value
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/basic_math_functions.h"
 30  #include <math.h>
 31  
 32  /**
 33    @ingroup groupMath
 34   */
 35  
 36  /**
 37    @defgroup BasicAbs Vector Absolute Value
 38  
 39    Computes the absolute value of a vector on an element-by-element basis.
 40  
 41    <pre>
 42        pDst[n] = abs(pSrc[n]),   0 <= n < blockSize.
 43    </pre>
 44  
 45    The functions support in-place computation allowing the source and
 46    destination pointers to reference the same memory buffer.
 47    There are separate functions for floating-point, Q7, Q15, and Q31 data types.
 48   */
 49  
 50  /**
 51    @addtogroup BasicAbs
 52    @{
 53   */
 54  
 55  /**
 56    @brief         Floating-point vector absolute value.
 57    @param[in]     pSrc       points to the input vector
 58    @param[out]    pDst       points to the output vector
 59    @param[in]     blockSize  number of samples in each vector
 60    @return        none
 61   */
 62  
 63  
 64  #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 65  
 66  #include "arm_helium_utils.h"
 67  
 68  void arm_abs_f32(
 69    const float32_t * pSrc,
 70          float32_t * pDst,
 71          uint32_t blockSize)
 72  {
 73      uint32_t blkCnt;                               /* Loop counter */
 74      f32x4_t vec1;
 75      f32x4_t res;
 76  
 77  
 78      /* Compute 4 outputs at a time */
 79      blkCnt = blockSize >> 2U;
 80  
 81      while (blkCnt > 0U)
 82      {
 83          /* C = |A| */
 84  
 85          /* Calculate absolute values and then store the results in the destination buffer. */
 86          vec1 = vld1q(pSrc);
 87          res = vabsq(vec1);
 88          vst1q(pDst, res);
 89  
 90          /* Increment pointers */
 91          pSrc += 4;
 92          pDst += 4;
 93          
 94          /* Decrement the loop counter */
 95          blkCnt--;
 96      }
 97  
 98      /* Tail */
 99      blkCnt = blockSize & 0x3;
100  
101  
102      if (blkCnt > 0U)
103      {
104        /* C = |A| */
105        mve_pred16_t p0 = vctp32q(blkCnt);
106        vec1 = vld1q(pSrc);
107        vstrwq_p(pDst, vabsq(vec1), p0);
108      }
109  
110  }
111  
112  #else
113  void arm_abs_f32(
114    const float32_t * pSrc,
115          float32_t * pDst,
116          uint32_t blockSize)
117  {
118          uint32_t blkCnt;                               /* Loop counter */
119  
120  #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
121      f32x4_t vec1;
122      f32x4_t res;
123  
124      /* Compute 4 outputs at a time */
125      blkCnt = blockSize >> 2U;
126  
127      while (blkCnt > 0U)
128      {
129          /* C = |A| */
130  
131      	/* Calculate absolute values and then store the results in the destination buffer. */
132          vec1 = vld1q_f32(pSrc);
133          res = vabsq_f32(vec1);
134          vst1q_f32(pDst, res);
135  
136          /* Increment pointers */
137          pSrc += 4;
138          pDst += 4;
139          
140          /* Decrement the loop counter */
141          blkCnt--;
142      }
143  
144      /* Tail */
145      blkCnt = blockSize & 0x3;
146  
147  #else
148  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
149  
150    /* Loop unrolling: Compute 4 outputs at a time */
151    blkCnt = blockSize >> 2U;
152  
153    while (blkCnt > 0U)
154    {
155      /* C = |A| */
156  
157      /* Calculate absolute and store result in destination buffer. */
158      *pDst++ = fabsf(*pSrc++);
159  
160      *pDst++ = fabsf(*pSrc++);
161  
162      *pDst++ = fabsf(*pSrc++);
163  
164      *pDst++ = fabsf(*pSrc++);
165  
166      /* Decrement loop counter */
167      blkCnt--;
168    }
169  
170    /* Loop unrolling: Compute remaining outputs */
171    blkCnt = blockSize % 0x4U;
172  
173  #else
174  
175    /* Initialize blkCnt with number of samples */
176    blkCnt = blockSize;
177  
178  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
179  #endif /* #if defined(ARM_MATH_NEON) */
180  
181    while (blkCnt > 0U)
182    {
183      /* C = |A| */
184  
185      /* Calculate absolute and store result in destination buffer. */
186      *pDst++ = fabsf(*pSrc++);
187  
188      /* Decrement loop counter */
189      blkCnt--;
190    }
191  
192  }
193  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
194  /**
195    @} end of BasicAbs group
196   */