arm_max_q7.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_max_q7.c 4 * Description: Maximum value of a Q7 vector 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/statistics_functions.h" 30 31 /** 32 @ingroup groupStats 33 */ 34 35 /** 36 @addtogroup Max 37 @{ 38 */ 39 40 /** 41 @brief Maximum value of a Q7 vector. 42 @param[in] pSrc points to the input vector 43 @param[in] blockSize number of samples in input vector 44 @param[out] pResult maximum value returned here 45 @param[out] pIndex index of maximum value returned here 46 @return none 47 */ 48 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 49 50 #include "arm_helium_utils.h" 51 52 static void arm_small_blk_max_q7( 53 const q7_t * pSrc, 54 uint16_t blockSize, 55 q7_t * pResult, 56 uint32_t * pIndex) 57 { 58 int32_t blkCnt; /* loop counters */ 59 q7x16_t extremValVec = vdupq_n_s8(Q7_MIN); 60 q7_t maxValue = Q7_MIN; 61 uint8x16_t indexVec; 62 uint8x16_t extremIdxVec; 63 mve_pred16_t p0; 64 uint8_t extremIdxArr[16]; 65 66 indexVec = vidupq_u8(0U, 1); 67 68 blkCnt = blockSize; 69 do { 70 mve_pred16_t p = vctp8q(blkCnt); 71 q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p); 72 /* 73 * Get current max per lane and current index per lane 74 * when a max is selected 75 */ 76 p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); 77 78 extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); 79 /* store per-lane extrema indexes */ 80 vst1q_p_u8(extremIdxArr, indexVec, p0); 81 82 indexVec += 16; 83 pSrc += 16; 84 blkCnt -= 16; 85 } 86 while (blkCnt > 0); 87 88 89 /* Get max value across the vector */ 90 maxValue = vmaxvq(maxValue, extremValVec); 91 92 /* set index for lower values to max possible index */ 93 p0 = vcmpgeq(extremValVec, maxValue); 94 extremIdxVec = vld1q_u8(extremIdxArr); 95 96 indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0); 97 *pIndex = vminvq_u8(blockSize - 1, indexVec); 98 *pResult = maxValue; 99 } 100 101 void arm_max_q7( 102 const q7_t * pSrc, 103 uint32_t blockSize, 104 q7_t * pResult, 105 uint32_t * pIndex) 106 { 107 int32_t totalSize = blockSize; 108 const uint16_t sub_blk_sz = UINT8_MAX + 1; 109 110 if (totalSize <= sub_blk_sz) 111 { 112 arm_small_blk_max_q7(pSrc, blockSize, pResult, pIndex); 113 } 114 else 115 { 116 uint32_t curIdx = 0; 117 q7_t curBlkExtr = Q7_MIN; 118 uint32_t curBlkPos = 0; 119 uint32_t curBlkIdx = 0; 120 /* 121 * process blocks of 255 elts 122 */ 123 while (totalSize >= sub_blk_sz) 124 { 125 const q7_t *curSrc = pSrc; 126 127 arm_small_blk_max_q7(curSrc, sub_blk_sz, pResult, pIndex); 128 if (*pResult > curBlkExtr) 129 { 130 /* 131 * update partial extrema 132 */ 133 curBlkExtr = *pResult; 134 curBlkPos = *pIndex; 135 curBlkIdx = curIdx; 136 } 137 curIdx++; 138 pSrc += sub_blk_sz; 139 totalSize -= sub_blk_sz; 140 } 141 /* 142 * remainder 143 */ 144 arm_small_blk_max_q7(pSrc, totalSize, pResult, pIndex); 145 if (*pResult > curBlkExtr) 146 { 147 curBlkExtr = *pResult; 148 curBlkPos = *pIndex; 149 curBlkIdx = curIdx; 150 } 151 *pIndex = curBlkIdx * sub_blk_sz + curBlkPos; 152 *pResult = curBlkExtr; 153 } 154 } 155 #else 156 void arm_max_q7( 157 const q7_t * pSrc, 158 uint32_t blockSize, 159 q7_t * pResult, 160 uint32_t * pIndex) 161 { 162 q7_t maxVal, out; /* Temporary variables to store the output value. */ 163 uint32_t blkCnt, outIndex; /* Loop counter */ 164 165 #if defined (ARM_MATH_LOOPUNROLL) 166 uint32_t index; /* index of maximum value */ 167 #endif 168 169 /* Initialise index value to zero. */ 170 outIndex = 0U; 171 /* Load first input value that act as reference value for comparision */ 172 out = *pSrc++; 173 174 #if defined (ARM_MATH_LOOPUNROLL) 175 /* Initialise index of maximum value. */ 176 index = 0U; 177 178 /* Loop unrolling: Compute 4 outputs at a time */ 179 blkCnt = (blockSize - 1U) >> 2U; 180 181 while (blkCnt > 0U) 182 { 183 /* Initialize maxVal to next consecutive values one by one */ 184 maxVal = *pSrc++; 185 186 /* compare for the maximum value */ 187 if (out < maxVal) 188 { 189 /* Update the maximum value and it's index */ 190 out = maxVal; 191 outIndex = index + 1U; 192 } 193 194 maxVal = *pSrc++; 195 if (out < maxVal) 196 { 197 out = maxVal; 198 outIndex = index + 2U; 199 } 200 201 maxVal = *pSrc++; 202 if (out < maxVal) 203 { 204 out = maxVal; 205 outIndex = index + 3U; 206 } 207 208 maxVal = *pSrc++; 209 if (out < maxVal) 210 { 211 out = maxVal; 212 outIndex = index + 4U; 213 } 214 215 index += 4U; 216 217 /* Decrement loop counter */ 218 blkCnt--; 219 } 220 221 /* Loop unrolling: Compute remaining outputs */ 222 blkCnt = (blockSize - 1U) % 4U; 223 224 #else 225 226 /* Initialize blkCnt with number of samples */ 227 blkCnt = (blockSize - 1U); 228 229 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 230 231 while (blkCnt > 0U) 232 { 233 /* Initialize maxVal to the next consecutive values one by one */ 234 maxVal = *pSrc++; 235 236 /* compare for the maximum value */ 237 if (out < maxVal) 238 { 239 /* Update the maximum value and it's index */ 240 out = maxVal; 241 outIndex = blockSize - blkCnt; 242 } 243 244 /* Decrement loop counter */ 245 blkCnt--; 246 } 247 248 /* Store the maximum value and it's index into destination pointers */ 249 *pResult = out; 250 *pIndex = outIndex; 251 } 252 #endif /* defined(ARM_MATH_MVEI) */ 253 254 /** 255 @} end of Max group 256 */