arm_max_q31.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_max_q31.c 4 * Description: Maximum value of a Q31 vector 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/statistics_functions.h" 30 31 /** 32 @ingroup groupStats 33 */ 34 35 /** 36 @addtogroup Max 37 @{ 38 */ 39 40 /** 41 @brief Maximum value of a Q31 vector. 42 @param[in] pSrc points to the input vector 43 @param[in] blockSize number of samples in input vector 44 @param[out] pResult maximum value returned here 45 @param[out] pIndex index of maximum value returned here 46 @return none 47 */ 48 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 49 50 #include "arm_helium_utils.h" 51 52 void arm_max_q31( 53 const q31_t * pSrc, 54 uint32_t blockSize, 55 q31_t * pResult, 56 uint32_t * pIndex) 57 { 58 int32_t blkCnt; /* loop counters */ 59 q31x4_t extremValVec = vdupq_n_s32(Q31_MIN); 60 q31_t maxValue = Q31_MIN; 61 uint32x4_t indexVec; 62 uint32x4_t extremIdxVec; 63 mve_pred16_t p0; 64 uint32_t extremIdxArr[4]; 65 66 indexVec = vidupq_u32(0U, 1); 67 68 blkCnt = blockSize; 69 do { 70 mve_pred16_t p = vctp32q(blkCnt); 71 q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p); 72 /* 73 * Get current max per lane and current index per lane 74 * when a max is selected 75 */ 76 p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); 77 78 extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); 79 /* store per-lane extrema indexes */ 80 vst1q_p_u32(extremIdxArr, indexVec, p0); 81 82 indexVec += 4; 83 pSrc += 4; 84 blkCnt -= 4; 85 } 86 while (blkCnt > 0); 87 88 89 /* Get max value across the vector */ 90 maxValue = vmaxvq(maxValue, extremValVec); 91 92 /* set index for lower values to max possible index */ 93 p0 = vcmpgeq(extremValVec, maxValue); 94 extremIdxVec = vld1q_u32(extremIdxArr); 95 96 indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0); 97 *pIndex = vminvq(blockSize - 1, indexVec); 98 *pResult = maxValue; 99 } 100 101 #else 102 void arm_max_q31( 103 const q31_t * pSrc, 104 uint32_t blockSize, 105 q31_t * pResult, 106 uint32_t * pIndex) 107 { 108 q31_t maxVal, out; /* Temporary variables to store the output value. */ 109 uint32_t blkCnt, outIndex; /* Loop counter */ 110 111 #if defined (ARM_MATH_LOOPUNROLL) 112 uint32_t index; /* index of maximum value */ 113 #endif 114 115 /* Initialise index value to zero. */ 116 outIndex = 0U; 117 /* Load first input value that act as reference value for comparision */ 118 out = *pSrc++; 119 120 #if defined (ARM_MATH_LOOPUNROLL) 121 /* Initialise index of maximum value. */ 122 index = 0U; 123 124 /* Loop unrolling: Compute 4 outputs at a time */ 125 blkCnt = (blockSize - 1U) >> 2U; 126 127 while (blkCnt > 0U) 128 { 129 /* Initialize maxVal to next consecutive values one by one */ 130 maxVal = *pSrc++; 131 132 /* compare for the maximum value */ 133 if (out < maxVal) 134 { 135 /* Update the maximum value and it's index */ 136 out = maxVal; 137 outIndex = index + 1U; 138 } 139 140 maxVal = *pSrc++; 141 if (out < maxVal) 142 { 143 out = maxVal; 144 outIndex = index + 2U; 145 } 146 147 maxVal = *pSrc++; 148 if (out < maxVal) 149 { 150 out = maxVal; 151 outIndex = index + 3U; 152 } 153 154 maxVal = *pSrc++; 155 if (out < maxVal) 156 { 157 out = maxVal; 158 outIndex = index + 4U; 159 } 160 161 index += 4U; 162 163 /* Decrement loop counter */ 164 blkCnt--; 165 } 166 167 /* Loop unrolling: Compute remaining outputs */ 168 blkCnt = (blockSize - 1U) % 4U; 169 170 #else 171 172 /* Initialize blkCnt with number of samples */ 173 blkCnt = (blockSize - 1U); 174 175 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 176 177 while (blkCnt > 0U) 178 { 179 /* Initialize maxVal to the next consecutive values one by one */ 180 maxVal = *pSrc++; 181 182 /* compare for the maximum value */ 183 if (out < maxVal) 184 { 185 /* Update the maximum value and it's index */ 186 out = maxVal; 187 outIndex = blockSize - blkCnt; 188 } 189 190 /* Decrement loop counter */ 191 blkCnt--; 192 } 193 194 /* Store the maximum value and it's index into destination pointers */ 195 *pResult = out; 196 *pIndex = outIndex; 197 } 198 #endif /* defined(ARM_MATH_MVEI) */ 199 200 /** 201 @} end of Max group 202 */