/ Drivers / CMSIS / DSP / Source / StatisticsFunctions / arm_max_f32.c
arm_max_f32.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_max_f32.c
  4   * Description:  Maximum value of a floating-point vector
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/statistics_functions.h"
 30  #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
 31  #include <limits.h>
 32  #endif
 33  
 34  /**
 35    @ingroup groupStats
 36   */
 37  
 38  /**
 39    @defgroup Max Maximum
 40  
 41    Computes the maximum value of an array of data.
 42    The function returns both the maximum value and its position within the array.
 43    There are separate functions for floating-point, Q31, Q15, and Q7 data types.
 44   */
 45  
 46  /**
 47    @addtogroup Max
 48    @{
 49   */
 50  
 51  /**
 52    @brief         Maximum value of a floating-point vector.
 53    @param[in]     pSrc       points to the input vector
 54    @param[in]     blockSize  number of samples in input vector
 55    @param[out]    pResult    maximum value returned here
 56    @param[out]    pIndex     index of maximum value returned here
 57    @return        none
 58   */
 59  
 60  #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 61  void arm_max_f32(
 62    const float32_t * pSrc,
 63    uint32_t blockSize,
 64    float32_t * pResult,
 65    uint32_t * pIndex)
 66  {
 67      uint32_t blkCnt; 
 68      f32x4_t vecSrc;
 69      f32x4_t curExtremValVec = vdupq_n_f32(F32_MIN);
 70      float32_t maxValue = F32_MIN;
 71      uint32_t idx = blockSize;
 72      uint32x4_t indexVec;
 73      uint32x4_t curExtremIdxVec;
 74      uint32_t curIdx = 0;
 75      mve_pred16_t p0;
 76      float32_t tmp;
 77  
 78  
 79      indexVec = vidupq_wb_u32(&curIdx, 1);
 80      curExtremIdxVec = vdupq_n_u32(0);
 81  
 82      /* Compute 4 outputs at a time */
 83      blkCnt = blockSize >> 2U;
 84      while (blkCnt > 0U)
 85      {
 86          vecSrc = vldrwq_f32(pSrc);
 87          /*
 88           * Get current max per lane and current index per lane
 89           * when a max is selected
 90           */
 91          p0 = vcmpgeq(vecSrc, curExtremValVec);
 92          curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
 93          curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
 94  
 95          indexVec = vidupq_wb_u32(&curIdx, 1);
 96  
 97          pSrc += 4;
 98          /* Decrement the loop counter */
 99          blkCnt--;
100      }
101  
102  
103      /*
104       * Get max value across the vector
105       */
106      maxValue = vmaxnmvq(maxValue, curExtremValVec);
107      /*
108       * set index for lower values to max possible index
109       */
110      p0 = vcmpgeq(curExtremValVec, maxValue);
111      indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
112      /*
113       * Get min index which is thus for a max value
114       */
115      idx = vminvq(idx, indexVec);
116  
117      /* Tail */
118      blkCnt = blockSize & 0x3;
119  
120      while (blkCnt > 0U)
121      {
122        /* Initialize tmp to the next consecutive values one by one */
123        tmp = *pSrc++;
124  
125        /* compare for the maximum value */
126        if (maxValue < tmp)
127        {
128          /* Update the maximum value and it's index */
129          maxValue = tmp;
130          idx = blockSize - blkCnt;
131        }
132  
133        /* Decrement loop counter */
134        blkCnt--;
135      }
136  
137      /*
138       * Save result
139       */
140      *pIndex = idx;
141      *pResult = maxValue;
142  }
143  
144  #else
145  #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
146  void arm_max_f32(
147    const float32_t * pSrc,
148    uint32_t blockSize,
149    float32_t * pResult,
150    uint32_t * pIndex)
151  {
152    float32_t maxVal1, out;               /* Temporary variables to store the output value. */
153    uint32_t blkCnt, outIndex;              /* loop counter */
154  
155    float32x4_t outV, srcV;
156    float32x2_t outV2;
157  
158    uint32x4_t idxV;
159    uint32x4_t maxIdx;
160    static const uint32_t indexInit[4]={4,5,6,7};
161    static const uint32_t countVInit[4]={0,1,2,3};
162  
163    uint32x4_t index;
164    uint32x4_t delta;
165    uint32x4_t countV;
166    uint32x2_t countV2;
167  
168    maxIdx = vdupq_n_u32(ULONG_MAX);
169    delta = vdupq_n_u32(4);
170    index = vld1q_u32(indexInit);
171    countV = vld1q_u32(countVInit);
172  
173  
174    /* Initialise the index value to zero. */
175    outIndex = 0U;
176  
177    /* Load first input value that act as reference value for comparison */
178    if (blockSize <= 3)
179    {
180        out = *pSrc++;
181  
182        blkCnt = blockSize - 1;
183  
184        while (blkCnt > 0U)
185        {
186          /* Initialize maxVal to the next consecutive values one by one */
187          maxVal1 = *pSrc++;
188      
189          /* compare for the maximum value */
190          if (out < maxVal1)
191          {
192            /* Update the maximum value and it's index */
193            out = maxVal1;
194            outIndex = blockSize - blkCnt;
195          }
196      
197          /* Decrement the loop counter */
198          blkCnt--;
199        }
200    }
201    else
202    {
203        outV = vld1q_f32(pSrc);
204        pSrc += 4;
205   
206        /* Compute 4 outputs at a time */
207        blkCnt = (blockSize - 4 ) >> 2U;
208      
209        while (blkCnt > 0U)
210        {
211          srcV = vld1q_f32(pSrc);
212          pSrc += 4;
213      
214          idxV = vcgtq_f32(srcV, outV);
215          outV = vbslq_f32(idxV, srcV, outV );
216          countV = vbslq_u32(idxV, index,countV );
217      
218          index = vaddq_u32(index,delta);
219      
220          /* Decrement the loop counter */
221          blkCnt--;
222        }
223      
224        outV2 = vpmax_f32(vget_low_f32(outV),vget_high_f32(outV));
225        outV2 = vpmax_f32(outV2,outV2);
226        out = vget_lane_f32(outV2, 0);
227      
228        idxV = vceqq_f32(outV, vdupq_n_f32(out));
229        countV = vbslq_u32(idxV, countV,maxIdx);
230        
231        countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
232        countV2 = vpmin_u32(countV2,countV2);
233        outIndex = vget_lane_u32(countV2,0); 
234      
235        /* if (blockSize - 1U) is not multiple of 4 */
236        blkCnt = (blockSize - 4 ) % 4U;
237      
238        while (blkCnt > 0U)
239        {
240          /* Initialize maxVal to the next consecutive values one by one */
241          maxVal1 = *pSrc++;
242      
243          /* compare for the maximum value */
244          if (out < maxVal1)
245          {
246            /* Update the maximum value and it's index */
247            out = maxVal1;
248            outIndex = blockSize - blkCnt ;
249          }
250      
251          /* Decrement the loop counter */
252          blkCnt--;
253        }
254      
255        
256    }
257  
258    /* Store the maximum value and it's index into destination pointers */
259    *pResult = out;
260    *pIndex = outIndex;
261  }
262  #else
263  void arm_max_f32(
264    const float32_t * pSrc,
265          uint32_t blockSize,
266          float32_t * pResult,
267          uint32_t * pIndex)
268  {
269          float32_t maxVal, out;                         /* Temporary variables to store the output value. */
270          uint32_t blkCnt, outIndex;                     /* Loop counter */
271  
272  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
273          uint32_t index;                                /* index of maximum value */
274  #endif
275  
276    /* Initialise index value to zero. */
277    outIndex = 0U;
278  
279    /* Load first input value that act as reference value for comparision */
280    out = *pSrc++;
281  
282  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
283    /* Initialise index of maximum value. */
284    index = 0U;
285  
286    /* Loop unrolling: Compute 4 outputs at a time */
287    blkCnt = (blockSize - 1U) >> 2U;
288  
289    while (blkCnt > 0U)
290    {
291      /* Initialize maxVal to next consecutive values one by one */
292      maxVal = *pSrc++;
293  
294      /* compare for the maximum value */
295      if (out < maxVal)
296      {
297        /* Update the maximum value and it's index */
298        out = maxVal;
299        outIndex = index + 1U;
300      }
301  
302      maxVal = *pSrc++;
303      if (out < maxVal)
304      {
305        out = maxVal;
306        outIndex = index + 2U;
307      }
308  
309      maxVal = *pSrc++;
310      if (out < maxVal)
311      {
312        out = maxVal;
313        outIndex = index + 3U;
314      }
315  
316      maxVal = *pSrc++;
317      if (out < maxVal)
318      {
319        out = maxVal;
320        outIndex = index + 4U;
321      }
322  
323      index += 4U;
324  
325      /* Decrement loop counter */
326      blkCnt--;
327    }
328  
329    /* Loop unrolling: Compute remaining outputs */
330    blkCnt = (blockSize - 1U) % 4U;
331  
332  #else
333  
334    /* Initialize blkCnt with number of samples */
335    blkCnt = (blockSize - 1U);
336  
337  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
338  
339    while (blkCnt > 0U)
340    {
341      /* Initialize maxVal to the next consecutive values one by one */
342      maxVal = *pSrc++;
343  
344      /* compare for the maximum value */
345      if (out < maxVal)
346      {
347        /* Update the maximum value and it's index */
348        out = maxVal;
349        outIndex = blockSize - blkCnt;
350      }
351  
352      /* Decrement loop counter */
353      blkCnt--;
354    }
355  
356    /* Store the maximum value and it's index into destination pointers */
357    *pResult = out;
358    *pIndex = outIndex;
359  }
360  #endif /* #if defined(ARM_MATH_NEON) */
361  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
362  
363  /**
364    @} end of Max group
365   */