/ Drivers / CMSIS / DSP / Source / StatisticsFunctions / arm_max_f16.c
arm_max_f16.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_max_f16.c
  4   * Description:  Maximum value of a floating-point vector
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/statistics_functions_f16.h"
 30  
 31  #if defined(ARM_FLOAT16_SUPPORTED)
 32  
 33  #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
 34  #include <limits.h>
 35  #endif
 36  
 37  /**
 38    @ingroup groupStats
 39   */
 40  
 41  
 42  /**
 43    @addtogroup Max
 44    @{
 45   */
 46  
 47  /**
 48    @brief         Maximum value of a floating-point vector.
 49    @param[in]     pSrc       points to the input vector
 50    @param[in]     blockSize  number of samples in input vector
 51    @param[out]    pResult    maximum value returned here
 52    @param[out]    pIndex     index of maximum value returned here
 53    @return        none
 54   */
 55  
 56  #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 57  
 58  void arm_max_f16(
 59    const float16_t * pSrc,
 60    uint32_t blockSize,
 61    float16_t * pResult,
 62    uint32_t * pIndex)
 63  {
 64       int32_t blkCnt;
 65      f16x8_t vecSrc;
 66      f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN);
 67      float16_t maxValue = F16_MIN;
 68      uint32_t idx = blockSize;
 69      uint16x8_t indexVec;
 70      uint16x8_t curExtremIdxVec;
 71      uint32_t curIdx = 0;
 72      mve_pred16_t p0;
 73      float16_t tmp;
 74  
 75  
 76      indexVec = vidupq_wb_u16(&curIdx, 1);
 77      curExtremIdxVec = vdupq_n_u16(0);
 78  
 79      /* Compute 4 outputs at a time */
 80      blkCnt = blockSize >> 3;
 81      while (blkCnt > 0)
 82      {
 83          vecSrc = vldrhq_f16(pSrc);
 84          /*
 85           * Get current max per lane and current index per lane
 86           * when a max is selected
 87           */
 88          p0 = vcmpgeq(vecSrc, curExtremValVec);
 89          curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
 90          curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
 91  
 92          indexVec = vidupq_wb_u16(&curIdx, 1);
 93  
 94          pSrc += 8;
 95          /* Decrement the loop counter */
 96          blkCnt--;
 97      }
 98  
 99  
100      /*
101       * Get max value across the vector
102       */
103      maxValue = vmaxnmvq(maxValue, curExtremValVec);
104      /*
105       * set index for lower values to max possible index
106       */
107      p0 = vcmpgeq(curExtremValVec, maxValue);
108      indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
109      /*
110       * Get min index which is thus for a max value
111       */
112      idx = vminvq(idx, indexVec);
113  
114      /* Tail */
115      blkCnt = blockSize & 7;
116  
117      while (blkCnt > 0)
118      {
119        /* Initialize tmp to the next consecutive values one by one */
120        tmp = *pSrc++;
121  
122        /* compare for the maximum value */
123        if (maxValue < tmp)
124        {
125          /* Update the maximum value and it's index */
126          maxValue = tmp;
127          idx = blockSize - blkCnt;
128        }
129  
130        /* Decrement loop counter */
131        blkCnt--;
132      }
133  
134      /*
135       * Save result
136       */
137      *pIndex = idx;
138      *pResult = maxValue;
139  }
140  
141  #else
142  void arm_max_f16(
143    const float16_t * pSrc,
144          uint32_t blockSize,
145          float16_t * pResult,
146          uint32_t * pIndex)
147  {
148          float16_t maxVal, out;                         /* Temporary variables to store the output value. */
149          uint32_t blkCnt, outIndex;                     /* Loop counter */
150  
151  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
152          uint32_t index;                                /* index of maximum value */
153  #endif
154  
155    /* Initialise index value to zero. */
156    outIndex = 0U;
157  
158    /* Load first input value that act as reference value for comparision */
159    out = *pSrc++;
160  
161  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
162    /* Initialise index of maximum value. */
163    index = 0U;
164  
165    /* Loop unrolling: Compute 4 outputs at a time */
166    blkCnt = (blockSize - 1U) >> 2U;
167  
168    while (blkCnt > 0U)
169    {
170      /* Initialize maxVal to next consecutive values one by one */
171      maxVal = *pSrc++;
172  
173      /* compare for the maximum value */
174      if (out < maxVal)
175      {
176        /* Update the maximum value and it's index */
177        out = maxVal;
178        outIndex = index + 1U;
179      }
180  
181      maxVal = *pSrc++;
182      if (out < maxVal)
183      {
184        out = maxVal;
185        outIndex = index + 2U;
186      }
187  
188      maxVal = *pSrc++;
189      if (out < maxVal)
190      {
191        out = maxVal;
192        outIndex = index + 3U;
193      }
194  
195      maxVal = *pSrc++;
196      if (out < maxVal)
197      {
198        out = maxVal;
199        outIndex = index + 4U;
200      }
201  
202      index += 4U;
203  
204      /* Decrement loop counter */
205      blkCnt--;
206    }
207  
208    /* Loop unrolling: Compute remaining outputs */
209    blkCnt = (blockSize - 1U) % 4U;
210  
211  #else
212  
213    /* Initialize blkCnt with number of samples */
214    blkCnt = (blockSize - 1U);
215  
216  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
217  
218    while (blkCnt > 0U)
219    {
220      /* Initialize maxVal to the next consecutive values one by one */
221      maxVal = *pSrc++;
222  
223      /* compare for the maximum value */
224      if (out < maxVal)
225      {
226        /* Update the maximum value and it's index */
227        out = maxVal;
228        outIndex = blockSize - blkCnt;
229      }
230  
231      /* Decrement loop counter */
232      blkCnt--;
233    }
234  
235    /* Store the maximum value and it's index into destination pointers */
236    *pResult = out;
237    *pIndex = outIndex;
238  }
239  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
240  
241  /**
242    @} end of Max group
243   */
244  
245  #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
246