/ Drivers / CMSIS / DSP / Source / StatisticsFunctions / arm_absmax_q7.c
arm_absmax_q7.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_absmax_q7.c
  4   * Description:  Maximum value of absolute values of a Q7 vector
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/statistics_functions.h"
 30  
 31  /**
 32    @ingroup groupStats
 33   */
 34  
 35  /**
 36    @addtogroup AbsMax
 37    @{
 38   */
 39  
 40  /**
 41    @brief         Maximum value of absolute values of a Q7 vector.
 42    @param[in]     pSrc       points to the input vector
 43    @param[in]     blockSize  number of samples in input vector
 44    @param[out]    pResult    maximum value returned here
 45    @param[out]    pIndex     index of maximum value returned here
 46    @return        none
 47   */
 48  
 49  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 50  
 51  #include <stdint.h>
 52  #include "arm_helium_utils.h"
 53  
 54  #define MAX_BLKSZ_S8  (UINT8_MAX+1)
 55  
 56  static void arm_small_blk_absmax_q7(
 57      const q7_t * pSrc,
 58      uint16_t blockSize,
 59      q7_t * pResult,
 60      uint32_t * pIndex)
 61  {
 62      int32_t        blkCnt;     /* loop counters */
 63      q7x16_t        extremValVec = vdupq_n_s8(Q7_ABSMIN);
 64      q7_t           maxValue = Q7_ABSMIN;
 65      uint8x16_t     indexVec;
 66      uint8x16_t     extremIdxVec;
 67      mve_pred16_t   p0;
 68      uint8_t        extremIdxArr[16];
 69  
 70      indexVec = vidupq_u8(0U, 1);
 71  
 72      blkCnt = blockSize;
 73      do {
 74          mve_pred16_t    p = vctp8q(blkCnt);
 75          q7x16_t         extremIdxVal = vld1q_z_s8(pSrc, p);
 76  
 77          extremIdxVal = vabsq(extremIdxVal);
 78          /*
 79           * Get current max per lane and current index per lane
 80           * when a max is selected
 81           */
 82          p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
 83  
 84          extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
 85          /* store per-lane extrema indexes */
 86          vst1q_p_u8(extremIdxArr, indexVec, p0);
 87  
 88          indexVec += 16;
 89          pSrc += 16;
 90          blkCnt -= 16;
 91      }
 92      while (blkCnt > 0);
 93  
 94  
 95      /* Get max value across the vector   */
 96      maxValue = vmaxvq(maxValue, extremValVec);
 97  
 98      /* set index for lower values to max possible index   */
 99      p0 = vcmpgeq(extremValVec, maxValue);
100      extremIdxVec = vld1q_u8(extremIdxArr);
101  
102      indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
103      *pIndex = vminvq_u8(blockSize - 1, indexVec);
104      *pResult = maxValue;
105  }
106  
107  void arm_absmax_q7(
108    const q7_t * pSrc,
109          uint32_t blockSize,
110          q7_t * pResult,
111          uint32_t * pIndex)
112  {
113      int32_t   totalSize = blockSize;
114  
115      if (totalSize <= MAX_BLKSZ_S8)
116      {
117          arm_small_blk_absmax_q7(pSrc, blockSize, pResult, pIndex);
118      }
119      else
120      {
121          uint32_t  curIdx = 0;
122          q7_t      curBlkExtr = Q7_MIN;
123          uint32_t  curBlkPos = 0;
124          uint32_t  curBlkIdx = 0;
125          /*
126           * process blocks of 255 elts
127           */
128          while (totalSize >= MAX_BLKSZ_S8)
129          {
130              const q7_t     *curSrc = pSrc;
131  
132              arm_small_blk_absmax_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
133              if (*pResult > curBlkExtr)
134              {
135                  /*
136                   * update partial extrema
137                   */
138                  curBlkExtr = *pResult;
139                  curBlkPos = *pIndex;
140                  curBlkIdx = curIdx;
141              }
142              curIdx++;
143              pSrc += MAX_BLKSZ_S8;
144              totalSize -= MAX_BLKSZ_S8;
145          }
146          /*
147           * remainder
148           */
149          arm_small_blk_absmax_q7(pSrc, totalSize, pResult, pIndex);
150          if (*pResult > curBlkExtr)
151          {
152              curBlkExtr = *pResult;
153              curBlkPos = *pIndex;
154              curBlkIdx = curIdx;
155          }
156          *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
157          *pResult = curBlkExtr;
158      }
159  }
160  #else
161  #if defined(ARM_MATH_DSP)
162  void arm_absmax_q7(
163    const q7_t * pSrc,
164          uint32_t blockSize,
165          q7_t * pResult,
166          uint32_t * pIndex)
167  {
168          q7_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
169          uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
170          uint32_t index;                                /* index of maximum value */                         \
171                                                                                                              \
172    /* Initialize index value to zero. */                                                                     \
173    outIndex = 0U;                                                                                            \
174    /* Load first input value that act as reference value for comparision */                                  \
175    out = *pSrc++;                                                                                            \
176    out = (out > 0) ? out : (q7_t)__QSUB8(0, out);                                                                           \
177    /* Initialize index of extrema value. */                                                                  \
178    index = 0U;                                                                                               \
179                                                                                                              \
180    /* Loop unrolling: Compute 4 outputs at a time */                                                         \
181    blkCnt = (blockSize - 1U) >> 2U;                                                                          \
182                                                                                                              \
183    while (blkCnt > 0U)                                                                                       \
184    {                                                                                                         \
185      /* Initialize cur_absmax to next consecutive values one by one */                                         \
186      cur_absmax = *pSrc++;                                                                                     \
187      cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
188      /* compare for the extrema value */                                                                     \
189      if (cur_absmax > out)                                                                         \
190      {                                                                                                       \
191        /* Update the extrema value and it's index */                                                         \
192        out = cur_absmax;                                                                                       \
193        outIndex = index + 1U;                                                                                \
194      }                                                                                                       \
195                                                                                                              \
196      cur_absmax = *pSrc++;                                                                                     \
197      cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
198      if (cur_absmax > out)                                                                         \
199      {                                                                                                       \
200        out = cur_absmax;                                                                                       \
201        outIndex = index + 2U;                                                                                \
202      }                                                                                                       \
203                                                                                                              \
204      cur_absmax = *pSrc++;                                                                                     \
205      cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
206      if (cur_absmax > out)                                                                          \
207      {                                                                                                       \
208        out = cur_absmax;                                                                                       \
209        outIndex = index + 3U;                                                                                \
210      }                                                                                                       \
211                                                                                                              \
212      cur_absmax = *pSrc++;                                                                                     \
213      cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                 \
214      if (cur_absmax > out)                                                                          \
215      {                                                                                                       \
216        out = cur_absmax;                                                                                       \
217        outIndex = index + 4U;                                                                                \
218      }                                                                                                       \
219                                                                                                              \
220      index += 4U;                                                                                            \
221                                                                                                              \
222      /* Decrement loop counter */                                                                            \
223      blkCnt--;                                                                                               \
224    }                                                                                                         \
225                                                                                                              \
226    /* Loop unrolling: Compute remaining outputs */                                                           \
227    blkCnt = (blockSize - 1U) % 4U;                                                                           \
228                                                                                                              \
229                                                                                                              \
230    while (blkCnt > 0U)                                                                                       \
231    {                                                                                                         \
232      cur_absmax = *pSrc++;                                                                                     \
233      cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                 \
234      if (cur_absmax > out)                                                                         \
235      {                                                                                                       \
236        out = cur_absmax;                                                                                       \
237        outIndex = blockSize - blkCnt;                                                                        \
238      }                                                                                                       \
239                                                                                                              \
240      /* Decrement loop counter */                                                                            \
241      blkCnt--;                                                                                               \
242    }                                                                                                         \
243                                                                                                              \
244    /* Store the extrema value and it's index into destination pointers */                                    \
245    *pResult = out;                                                                                           \
246    *pIndex = outIndex;  
247  }
248  #else
249  void arm_absmax_q7(
250    const q7_t * pSrc,
251          uint32_t blockSize,
252          q7_t * pResult,
253          uint32_t * pIndex)
254  {
255         q7_t maxVal, out;                              /* Temporary variables to store the output value. */
256          uint32_t blkCnt, outIndex;                     /* Loop counter */
257  
258  
259    /* Initialise index value to zero. */
260    outIndex = 0U;
261    /* Load first input value that act as reference value for comparision */
262    out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
263    pSrc++;
264  
265    /* Initialize blkCnt with number of samples */
266    blkCnt = (blockSize - 1U);
267  
268    while (blkCnt > 0U)
269    {
270      /* Initialize maxVal to the next consecutive values one by one */
271      maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
272      pSrc++;
273  
274      /* compare for the maximum value */
275      if (out < maxVal)
276      {
277        /* Update the maximum value and it's index */
278        out = maxVal;
279        outIndex = blockSize - blkCnt;
280      }
281  
282      /* Decrement loop counter */
283      blkCnt--;
284    }
285  
286    /* Store the maximum value and it's index into destination pointers */
287    *pResult = out;
288    *pIndex = outIndex;
289  }
290  #endif /* defined(ARM_MATH_DSP) */
291  #endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
292  /**
293    @} end of AbsMax group
294   */