/ Drivers / CMSIS / DSP / Source / StatisticsFunctions / arm_max_q7.c
arm_max_q7.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_max_q7.c
  4   * Description:  Maximum value of a Q7 vector
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/statistics_functions.h"
 30  
 31  /**
 32    @ingroup groupStats
 33   */
 34  
 35  /**
 36    @addtogroup Max
 37    @{
 38   */
 39  
 40  /**
 41    @brief         Maximum value of a Q7 vector.
 42    @param[in]     pSrc       points to the input vector
 43    @param[in]     blockSize  number of samples in input vector
 44    @param[out]    pResult    maximum value returned here
 45    @param[out]    pIndex     index of maximum value returned here
 46    @return        none
 47   */
 48  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 49  
 50  #include "arm_helium_utils.h"
 51  
 52  static void arm_small_blk_max_q7(
 53      const q7_t * pSrc,
 54      uint16_t blockSize,
 55      q7_t * pResult,
 56      uint32_t * pIndex)
 57  {
 58      int32_t        blkCnt;     /* loop counters */
 59      q7x16_t        extremValVec = vdupq_n_s8(Q7_MIN);
 60      q7_t           maxValue = Q7_MIN;
 61      uint8x16_t     indexVec;
 62      uint8x16_t     extremIdxVec;
 63      mve_pred16_t   p0;
 64      uint8_t        extremIdxArr[16];
 65  
 66      indexVec = vidupq_u8(0U, 1);
 67  
 68      blkCnt = blockSize;
 69      do {
 70          mve_pred16_t    p = vctp8q(blkCnt);
 71          q7x16_t         extremIdxVal = vld1q_z_s8(pSrc, p);
 72          /*
 73           * Get current max per lane and current index per lane
 74           * when a max is selected
 75           */
 76          p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
 77  
 78          extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
 79          /* store per-lane extrema indexes */
 80          vst1q_p_u8(extremIdxArr, indexVec, p0);
 81  
 82          indexVec += 16;
 83          pSrc += 16;
 84          blkCnt -= 16;
 85      }
 86      while (blkCnt > 0);
 87  
 88  
 89      /* Get max value across the vector   */
 90      maxValue = vmaxvq(maxValue, extremValVec);
 91  
 92      /* set index for lower values to max possible index   */
 93      p0 = vcmpgeq(extremValVec, maxValue);
 94      extremIdxVec = vld1q_u8(extremIdxArr);
 95  
 96      indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
 97      *pIndex = vminvq_u8(blockSize - 1, indexVec);
 98      *pResult = maxValue;
 99  }
100  
101  void arm_max_q7(
102    const q7_t * pSrc,
103          uint32_t blockSize,
104          q7_t * pResult,
105          uint32_t * pIndex)
106  {
107      int32_t   totalSize = blockSize;
108      const uint16_t sub_blk_sz = UINT8_MAX + 1;
109  
110      if (totalSize <= sub_blk_sz)
111      {
112          arm_small_blk_max_q7(pSrc, blockSize, pResult, pIndex);
113      }
114      else
115      {
116          uint32_t  curIdx = 0;
117          q7_t      curBlkExtr = Q7_MIN;
118          uint32_t  curBlkPos = 0;
119          uint32_t  curBlkIdx = 0;
120          /*
121           * process blocks of 255 elts
122           */
123          while (totalSize >= sub_blk_sz)
124          {
125              const q7_t     *curSrc = pSrc;
126  
127              arm_small_blk_max_q7(curSrc, sub_blk_sz, pResult, pIndex);
128              if (*pResult > curBlkExtr)
129              {
130                  /*
131                   * update partial extrema
132                   */
133                  curBlkExtr = *pResult;
134                  curBlkPos = *pIndex;
135                  curBlkIdx = curIdx;
136              }
137              curIdx++;
138              pSrc += sub_blk_sz;
139              totalSize -= sub_blk_sz;
140          }
141          /*
142           * remainder
143           */
144          arm_small_blk_max_q7(pSrc, totalSize, pResult, pIndex);
145          if (*pResult > curBlkExtr)
146          {
147              curBlkExtr = *pResult;
148              curBlkPos = *pIndex;
149              curBlkIdx = curIdx;
150          }
151          *pIndex = curBlkIdx * sub_blk_sz + curBlkPos;
152          *pResult = curBlkExtr;
153      }
154  }
155  #else
156  void arm_max_q7(
157    const q7_t * pSrc,
158          uint32_t blockSize,
159          q7_t * pResult,
160          uint32_t * pIndex)
161  {
162          q7_t maxVal, out;                              /* Temporary variables to store the output value. */
163          uint32_t blkCnt, outIndex;                     /* Loop counter */
164  
165  #if defined (ARM_MATH_LOOPUNROLL)
166          uint32_t index;                                /* index of maximum value */
167  #endif
168  
169    /* Initialise index value to zero. */
170    outIndex = 0U;
171    /* Load first input value that act as reference value for comparision */
172    out = *pSrc++;
173  
174  #if defined (ARM_MATH_LOOPUNROLL)
175    /* Initialise index of maximum value. */
176    index = 0U;
177  
178    /* Loop unrolling: Compute 4 outputs at a time */
179    blkCnt = (blockSize - 1U) >> 2U;
180  
181    while (blkCnt > 0U)
182    {
183      /* Initialize maxVal to next consecutive values one by one */
184      maxVal = *pSrc++;
185  
186      /* compare for the maximum value */
187      if (out < maxVal)
188      {
189        /* Update the maximum value and it's index */
190        out = maxVal;
191        outIndex = index + 1U;
192      }
193  
194      maxVal = *pSrc++;
195      if (out < maxVal)
196      {
197        out = maxVal;
198        outIndex = index + 2U;
199      }
200  
201      maxVal = *pSrc++;
202      if (out < maxVal)
203      {
204        out = maxVal;
205        outIndex = index + 3U;
206      }
207  
208      maxVal = *pSrc++;
209      if (out < maxVal)
210      {
211        out = maxVal;
212        outIndex = index + 4U;
213      }
214  
215      index += 4U;
216  
217      /* Decrement loop counter */
218      blkCnt--;
219    }
220  
221    /* Loop unrolling: Compute remaining outputs */
222    blkCnt = (blockSize - 1U) % 4U;
223  
224  #else
225  
226    /* Initialize blkCnt with number of samples */
227    blkCnt = (blockSize - 1U);
228  
229  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
230  
231    while (blkCnt > 0U)
232    {
233      /* Initialize maxVal to the next consecutive values one by one */
234      maxVal = *pSrc++;
235  
236      /* compare for the maximum value */
237      if (out < maxVal)
238      {
239        /* Update the maximum value and it's index */
240        out = maxVal;
241        outIndex = blockSize - blkCnt;
242      }
243  
244      /* Decrement loop counter */
245      blkCnt--;
246    }
247  
248    /* Store the maximum value and it's index into destination pointers */
249    *pResult = out;
250    *pIndex = outIndex;
251  }
252  #endif /* defined(ARM_MATH_MVEI) */
253  
254  /**
255    @} end of Max group
256   */