/ Drivers / CMSIS / DSP / Source / StatisticsFunctions / arm_absmax_f16.c
arm_absmax_f16.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_absmax_f16.c
  4   * Description:  Maximum value of a absolute values of a floating-point vector
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/statistics_functions_f16.h"
 30  
 31  #if defined(ARM_FLOAT16_SUPPORTED)
 32  
 33  #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
 34  #include <limits.h>
 35  #endif
 36  
 37  /**
 38    @ingroup groupStats
 39   */
 40  
 41  
 42  /**
 43    @addtogroup AbsMax
 44    @{
 45   */
 46  
 47  /**
 48    @brief         Maximum value of absolute values of a floating-point vector.
 49    @param[in]     pSrc       points to the input vector
 50    @param[in]     blockSize  number of samples in input vector
 51    @param[out]    pResult    maximum value returned here
 52    @param[out]    pIndex     index of maximum value returned here
 53    @return        none
 54   */
 55  
 56  #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 57  
 58  #include "arm_helium_utils.h"
 59  void arm_absmax_f16(
 60    const float16_t * pSrc,
 61          uint32_t blockSize,
 62          float16_t * pResult,
 63          uint32_t * pIndex)
 64  {
 65        uint16_t        blkCnt;           /* loop counters */
 66      f16x8_t       vecSrc;
 67      float16_t const *pSrcVec;
 68      f16x8_t       curExtremValVec = vdupq_n_f16(F16_ABSMIN);
 69      float16_t       maxValue = F16_ABSMIN;
 70      uint16_t        idx = blockSize;
 71      uint16x8_t    indexVec;
 72      uint16x8_t    curExtremIdxVec;
 73      mve_pred16_t    p0;
 74  
 75  
 76      indexVec = vidupq_u16((uint32_t)0, 1);
 77      curExtremIdxVec = vdupq_n_u16(0);
 78  
 79      pSrcVec = (float16_t const *) pSrc;
 80      blkCnt = blockSize >> 3;
 81      while (blkCnt > 0U)
 82      {
 83          vecSrc = vldrhq_f16(pSrcVec);  
 84          pSrcVec += 8;
 85          vecSrc = vabsq(vecSrc);
 86          /*
 87           * Get current max per lane and current index per lane
 88           * when a max is selected
 89           */
 90          p0 = vcmpgeq(vecSrc, curExtremValVec);
 91          curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
 92          curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
 93  
 94          indexVec = indexVec +  8;
 95          /*
 96           * Decrement the blockSize loop counter
 97           */
 98          blkCnt--;
 99      }
100      /*
101       * tail
102       * (will be merged thru tail predication)
103       */
104      blkCnt = blockSize & 7;
105      if (blkCnt > 0U)
106      {
107          vecSrc = vldrhq_f16(pSrcVec);  
108          pSrcVec += 8;
109          vecSrc = vabsq(vecSrc);
110  
111          p0 = vctp16q(blkCnt);
112          /*
113           * Get current max per lane and current index per lane
114           * when a max is selected
115           */
116          p0 = vcmpgeq_m(vecSrc, curExtremValVec, p0);
117          curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
118          curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
119      }
120      /*
121       * Get max value across the vector
122       */
123      maxValue = vmaxnmvq(maxValue, curExtremValVec);
124      /*
125       * set index for lower values to max possible index
126       */
127      p0 = vcmpgeq(curExtremValVec, maxValue);
128      indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
129      /*
130       * Get min index which is thus for a max value
131       */
132      idx = vminvq(idx, indexVec);
133      /*
134       * Save result
135       */
136      *pIndex = idx;
137      *pResult = maxValue;
138  }
139  #else
140  #if defined(ARM_MATH_LOOPUNROLL)
141  void arm_absmax_f16(
142    const float16_t * pSrc,
143          uint32_t blockSize,
144          float16_t * pResult,
145          uint32_t * pIndex)
146  {
147          float16_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
148          uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
149          uint32_t index;                                /* index of maximum value */                         \
150                                                                                                              \
151    /* Initialize index value to zero. */                                                                     \
152    outIndex = 0U;                                                                                            \
153    /* Load first input value that act as reference value for comparision */                                  \
154    out = *pSrc++;                                                                                            \
155    out = (out > 0.0f16) ? out : -out;                                                                             \
156    /* Initialize index of extrema value. */                                                                  \
157    index = 0U;                                                                                               \
158                                                                                                              \
159    /* Loop unrolling: Compute 4 outputs at a time */                                                         \
160    blkCnt = (blockSize - 1U) >> 2U;                                                                          \
161                                                                                                              \
162    while (blkCnt > 0U)                                                                                       \
163    {                                                                                                         \
164      /* Initialize cur_absmax to next consecutive values one by one */                                         \
165      cur_absmax = *pSrc++;                                                                                     \
166      cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax;                                                                 \
167      /* compare for the extrema value */                                                                     \
168      if (cur_absmax > out)                                                                         \
169      {                                                                                                       \
170        /* Update the extrema value and it's index */                                                         \
171        out = cur_absmax;                                                                                       \
172        outIndex = index + 1U;                                                                                \
173      }                                                                                                       \
174                                                                                                              \
175      cur_absmax = *pSrc++;                                                                                     \
176      cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax;                                                                 \
177      if (cur_absmax > out)                                                                         \
178      {                                                                                                       \
179        out = cur_absmax;                                                                                       \
180        outIndex = index + 2U;                                                                                \
181      }                                                                                                       \
182                                                                                                              \
183      cur_absmax = *pSrc++;                                                                                     \
184      cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax;                                                                 \
185      if (cur_absmax > out)                                                                          \
186      {                                                                                                       \
187        out = cur_absmax;                                                                                       \
188        outIndex = index + 3U;                                                                                \
189      }                                                                                                       \
190                                                                                                              \
191      cur_absmax = *pSrc++;                                                                                     \
192      cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax;                                                                 \
193      if (cur_absmax > out)                                                                          \
194      {                                                                                                       \
195        out = cur_absmax;                                                                                       \
196        outIndex = index + 4U;                                                                                \
197      }                                                                                                       \
198                                                                                                              \
199      index += 4U;                                                                                            \
200                                                                                                              \
201      /* Decrement loop counter */                                                                            \
202      blkCnt--;                                                                                               \
203    }                                                                                                         \
204                                                                                                              \
205    /* Loop unrolling: Compute remaining outputs */                                                           \
206    blkCnt = (blockSize - 1U) % 4U;                                                                           \
207                                                                                                              \
208                                                                                                              \
209    while (blkCnt > 0U)                                                                                       \
210    {                                                                                                         \
211      cur_absmax = *pSrc++;                                                                                     \
212      cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax;                                                                 \
213      if (cur_absmax > out)                                                                         \
214      {                                                                                                       \
215        out = cur_absmax;                                                                                       \
216        outIndex = blockSize - blkCnt;                                                                        \
217      }                                                                                                       \
218                                                                                                              \
219      /* Decrement loop counter */                                                                            \
220      blkCnt--;                                                                                               \
221    }                                                                                                         \
222                                                                                                              \
223    /* Store the extrema value and it's index into destination pointers */                                    \
224    *pResult = out;                                                                                           \
225    *pIndex = outIndex;  
226  }
227  #else
228  void arm_absmax_f16(
229    const float16_t * pSrc,
230          uint32_t blockSize,
231          float16_t * pResult,
232          uint32_t * pIndex)
233  {
234          float16_t maxVal, out;                         /* Temporary variables to store the output value. */
235          uint32_t blkCnt, outIndex;                     /* Loop counter */
236  
237    /* Initialise index value to zero. */
238    outIndex = 0U;
239  
240    /* Load first input value that act as reference value for comparision */
241    out = fabsf(*pSrc++);
242  
243    /* Initialize blkCnt with number of samples */
244    blkCnt = (blockSize - 1U);
245  
246    while (blkCnt > 0U)
247    {
248      /* Initialize maxVal to the next consecutive values one by one */
249      maxVal = fabsf(*pSrc++);
250  
251      /* compare for the maximum value */
252      if (out < maxVal)
253      {
254        /* Update the maximum value and it's index */
255        out = maxVal;
256        outIndex = blockSize - blkCnt;
257      }
258  
259      /* Decrement loop counter */
260      blkCnt--;
261    }
262  
263    /* Store the maximum value and it's index into destination pointers */
264    *pResult = out;
265    *pIndex = outIndex;
266  }
267  #endif /* defined(ARM_MATH_LOOPUNROLL) */
268  #endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
269  /**
270    @} end of AbsMax group
271   */
272  
273  #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
274