arm_absmax_q7.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_absmax_q7.c 4 * Description: Maximum value of absolute values of a Q7 vector 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/statistics_functions.h" 30 31 /** 32 @ingroup groupStats 33 */ 34 35 /** 36 @addtogroup AbsMax 37 @{ 38 */ 39 40 /** 41 @brief Maximum value of absolute values of a Q7 vector. 42 @param[in] pSrc points to the input vector 43 @param[in] blockSize number of samples in input vector 44 @param[out] pResult maximum value returned here 45 @param[out] pIndex index of maximum value returned here 46 @return none 47 */ 48 49 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 50 51 #include <stdint.h> 52 #include "arm_helium_utils.h" 53 54 #define MAX_BLKSZ_S8 (UINT8_MAX+1) 55 56 static void arm_small_blk_absmax_q7( 57 const q7_t * pSrc, 58 uint16_t blockSize, 59 q7_t * pResult, 60 uint32_t * pIndex) 61 { 62 int32_t blkCnt; /* loop counters */ 63 q7x16_t extremValVec = vdupq_n_s8(Q7_ABSMIN); 64 q7_t maxValue = Q7_ABSMIN; 65 uint8x16_t indexVec; 66 uint8x16_t extremIdxVec; 67 mve_pred16_t p0; 68 uint8_t extremIdxArr[16]; 69 70 indexVec = vidupq_u8(0U, 1); 71 72 blkCnt = blockSize; 73 do { 74 mve_pred16_t p = vctp8q(blkCnt); 75 q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p); 76 77 extremIdxVal = vabsq(extremIdxVal); 78 /* 79 * Get current max per lane and current index per lane 80 * when a max is selected 81 */ 82 p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); 83 84 extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); 85 /* store per-lane extrema indexes */ 86 vst1q_p_u8(extremIdxArr, indexVec, p0); 87 88 indexVec += 16; 89 pSrc += 16; 90 blkCnt -= 16; 91 } 92 while (blkCnt > 0); 93 94 95 /* Get max value across the vector */ 96 maxValue = vmaxvq(maxValue, extremValVec); 97 98 /* set index for lower values to max possible index */ 99 p0 = vcmpgeq(extremValVec, maxValue); 100 extremIdxVec = vld1q_u8(extremIdxArr); 101 102 indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0); 103 *pIndex = vminvq_u8(blockSize - 1, indexVec); 104 *pResult = maxValue; 105 } 106 107 void arm_absmax_q7( 108 const q7_t * pSrc, 109 uint32_t blockSize, 110 q7_t * pResult, 111 uint32_t * pIndex) 112 { 113 int32_t totalSize = blockSize; 114 115 if (totalSize <= MAX_BLKSZ_S8) 116 { 117 arm_small_blk_absmax_q7(pSrc, blockSize, pResult, pIndex); 118 } 119 else 120 { 121 uint32_t curIdx = 0; 122 q7_t curBlkExtr = Q7_MIN; 123 uint32_t curBlkPos = 0; 124 uint32_t curBlkIdx = 0; 125 /* 126 * process blocks of 255 elts 127 */ 128 while (totalSize >= MAX_BLKSZ_S8) 129 { 130 const q7_t *curSrc = pSrc; 131 132 arm_small_blk_absmax_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex); 133 if (*pResult > curBlkExtr) 134 { 135 /* 136 * update partial extrema 137 */ 138 curBlkExtr = *pResult; 139 curBlkPos = *pIndex; 140 curBlkIdx = curIdx; 141 } 142 curIdx++; 143 pSrc += MAX_BLKSZ_S8; 144 totalSize -= MAX_BLKSZ_S8; 145 } 146 /* 147 * remainder 148 */ 149 arm_small_blk_absmax_q7(pSrc, totalSize, pResult, pIndex); 150 if (*pResult > curBlkExtr) 151 { 152 curBlkExtr = *pResult; 153 curBlkPos = *pIndex; 154 curBlkIdx = curIdx; 155 } 156 *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos; 157 *pResult = curBlkExtr; 158 } 159 } 160 #else 161 #if defined(ARM_MATH_DSP) 162 void arm_absmax_q7( 163 const q7_t * pSrc, 164 uint32_t blockSize, 165 q7_t * pResult, 166 uint32_t * pIndex) 167 { 168 q7_t cur_absmax, out; /* Temporary variables to store the output value. */\ 169 uint32_t blkCnt, outIndex; /* Loop counter */ \ 170 uint32_t index; /* index of maximum value */ \ 171 \ 172 /* Initialize index value to zero. */ \ 173 outIndex = 0U; \ 174 /* Load first input value that act as reference value for comparision */ \ 175 out = *pSrc++; \ 176 out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \ 177 /* Initialize index of extrema value. */ \ 178 index = 0U; \ 179 \ 180 /* Loop unrolling: Compute 4 outputs at a time */ \ 181 blkCnt = (blockSize - 1U) >> 2U; \ 182 \ 183 while (blkCnt > 0U) \ 184 { \ 185 /* Initialize cur_absmax to next consecutive values one by one */ \ 186 cur_absmax = *pSrc++; \ 187 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ 188 /* compare for the extrema value */ \ 189 if (cur_absmax > out) \ 190 { \ 191 /* Update the extrema value and it's index */ \ 192 out = cur_absmax; \ 193 outIndex = index + 1U; \ 194 } \ 195 \ 196 cur_absmax = *pSrc++; \ 197 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ 198 if (cur_absmax > out) \ 199 { \ 200 out = cur_absmax; \ 201 outIndex = index + 2U; \ 202 } \ 203 \ 204 cur_absmax = *pSrc++; \ 205 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ 206 if (cur_absmax > out) \ 207 { \ 208 out = cur_absmax; \ 209 outIndex = index + 3U; \ 210 } \ 211 \ 212 cur_absmax = *pSrc++; \ 213 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ 214 if (cur_absmax > out) \ 215 { \ 216 out = cur_absmax; \ 217 outIndex = index + 4U; \ 218 } \ 219 \ 220 index += 4U; \ 221 \ 222 /* Decrement loop counter */ \ 223 blkCnt--; \ 224 } \ 225 \ 226 /* Loop unrolling: Compute remaining outputs */ \ 227 blkCnt = (blockSize - 1U) % 4U; \ 228 \ 229 \ 230 while (blkCnt > 0U) \ 231 { \ 232 cur_absmax = *pSrc++; \ 233 cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ 234 if (cur_absmax > out) \ 235 { \ 236 out = cur_absmax; \ 237 outIndex = blockSize - blkCnt; \ 238 } \ 239 \ 240 /* Decrement loop counter */ \ 241 blkCnt--; \ 242 } \ 243 \ 244 /* Store the extrema value and it's index into destination pointers */ \ 245 *pResult = out; \ 246 *pIndex = outIndex; 247 } 248 #else 249 void arm_absmax_q7( 250 const q7_t * pSrc, 251 uint32_t blockSize, 252 q7_t * pResult, 253 uint32_t * pIndex) 254 { 255 q7_t maxVal, out; /* Temporary variables to store the output value. */ 256 uint32_t blkCnt, outIndex; /* Loop counter */ 257 258 259 /* Initialise index value to zero. */ 260 outIndex = 0U; 261 /* Load first input value that act as reference value for comparision */ 262 out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc); 263 pSrc++; 264 265 /* Initialize blkCnt with number of samples */ 266 blkCnt = (blockSize - 1U); 267 268 while (blkCnt > 0U) 269 { 270 /* Initialize maxVal to the next consecutive values one by one */ 271 maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc); 272 pSrc++; 273 274 /* compare for the maximum value */ 275 if (out < maxVal) 276 { 277 /* Update the maximum value and it's index */ 278 out = maxVal; 279 outIndex = blockSize - blkCnt; 280 } 281 282 /* Decrement loop counter */ 283 blkCnt--; 284 } 285 286 /* Store the maximum value and it's index into destination pointers */ 287 *pResult = out; 288 *pIndex = outIndex; 289 } 290 #endif /* defined(ARM_MATH_DSP) */ 291 #endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ 292 /** 293 @} end of AbsMax group 294 */