arm_max_f16.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_max_f16.c 4 * Description: Maximum value of a floating-point vector 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/statistics_functions_f16.h" 30 31 #if defined(ARM_FLOAT16_SUPPORTED) 32 33 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) 34 #include <limits.h> 35 #endif 36 37 /** 38 @ingroup groupStats 39 */ 40 41 42 /** 43 @addtogroup Max 44 @{ 45 */ 46 47 /** 48 @brief Maximum value of a floating-point vector. 49 @param[in] pSrc points to the input vector 50 @param[in] blockSize number of samples in input vector 51 @param[out] pResult maximum value returned here 52 @param[out] pIndex index of maximum value returned here 53 @return none 54 */ 55 56 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) 57 58 void arm_max_f16( 59 const float16_t * pSrc, 60 uint32_t blockSize, 61 float16_t * pResult, 62 uint32_t * pIndex) 63 { 64 int32_t blkCnt; 65 f16x8_t vecSrc; 66 f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN); 67 float16_t maxValue = F16_MIN; 68 uint32_t idx = blockSize; 69 uint16x8_t indexVec; 70 uint16x8_t curExtremIdxVec; 71 uint32_t curIdx = 0; 72 mve_pred16_t p0; 73 float16_t tmp; 74 75 76 indexVec = vidupq_wb_u16(&curIdx, 1); 77 curExtremIdxVec = vdupq_n_u16(0); 78 79 /* Compute 4 outputs at a time */ 80 blkCnt = blockSize >> 3; 81 while (blkCnt > 0) 82 { 83 vecSrc = vldrhq_f16(pSrc); 84 /* 85 * Get current max per lane and current index per lane 86 * when a max is selected 87 */ 88 p0 = vcmpgeq(vecSrc, curExtremValVec); 89 curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); 90 curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); 91 92 indexVec = vidupq_wb_u16(&curIdx, 1); 93 94 pSrc += 8; 95 /* Decrement the loop counter */ 96 blkCnt--; 97 } 98 99 100 /* 101 * Get max value across the vector 102 */ 103 maxValue = vmaxnmvq(maxValue, curExtremValVec); 104 /* 105 * set index for lower values to max possible index 106 */ 107 p0 = vcmpgeq(curExtremValVec, maxValue); 108 indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0); 109 /* 110 * Get min index which is thus for a max value 111 */ 112 idx = vminvq(idx, indexVec); 113 114 /* Tail */ 115 blkCnt = blockSize & 7; 116 117 while (blkCnt > 0) 118 { 119 /* Initialize tmp to the next consecutive values one by one */ 120 tmp = *pSrc++; 121 122 /* compare for the maximum value */ 123 if (maxValue < tmp) 124 { 125 /* Update the maximum value and it's index */ 126 maxValue = tmp; 127 idx = blockSize - blkCnt; 128 } 129 130 /* Decrement loop counter */ 131 blkCnt--; 132 } 133 134 /* 135 * Save result 136 */ 137 *pIndex = idx; 138 *pResult = maxValue; 139 } 140 141 #else 142 void arm_max_f16( 143 const float16_t * pSrc, 144 uint32_t blockSize, 145 float16_t * pResult, 146 uint32_t * pIndex) 147 { 148 float16_t maxVal, out; /* Temporary variables to store the output value. */ 149 uint32_t blkCnt, outIndex; /* Loop counter */ 150 151 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) 152 uint32_t index; /* index of maximum value */ 153 #endif 154 155 /* Initialise index value to zero. */ 156 outIndex = 0U; 157 158 /* Load first input value that act as reference value for comparision */ 159 out = *pSrc++; 160 161 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) 162 /* Initialise index of maximum value. */ 163 index = 0U; 164 165 /* Loop unrolling: Compute 4 outputs at a time */ 166 blkCnt = (blockSize - 1U) >> 2U; 167 168 while (blkCnt > 0U) 169 { 170 /* Initialize maxVal to next consecutive values one by one */ 171 maxVal = *pSrc++; 172 173 /* compare for the maximum value */ 174 if (out < maxVal) 175 { 176 /* Update the maximum value and it's index */ 177 out = maxVal; 178 outIndex = index + 1U; 179 } 180 181 maxVal = *pSrc++; 182 if (out < maxVal) 183 { 184 out = maxVal; 185 outIndex = index + 2U; 186 } 187 188 maxVal = *pSrc++; 189 if (out < maxVal) 190 { 191 out = maxVal; 192 outIndex = index + 3U; 193 } 194 195 maxVal = *pSrc++; 196 if (out < maxVal) 197 { 198 out = maxVal; 199 outIndex = index + 4U; 200 } 201 202 index += 4U; 203 204 /* Decrement loop counter */ 205 blkCnt--; 206 } 207 208 /* Loop unrolling: Compute remaining outputs */ 209 blkCnt = (blockSize - 1U) % 4U; 210 211 #else 212 213 /* Initialize blkCnt with number of samples */ 214 blkCnt = (blockSize - 1U); 215 216 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 217 218 while (blkCnt > 0U) 219 { 220 /* Initialize maxVal to the next consecutive values one by one */ 221 maxVal = *pSrc++; 222 223 /* compare for the maximum value */ 224 if (out < maxVal) 225 { 226 /* Update the maximum value and it's index */ 227 out = maxVal; 228 outIndex = blockSize - blkCnt; 229 } 230 231 /* Decrement loop counter */ 232 blkCnt--; 233 } 234 235 /* Store the maximum value and it's index into destination pointers */ 236 *pResult = out; 237 *pIndex = outIndex; 238 } 239 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ 240 241 /** 242 @} end of Max group 243 */ 244 245 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 246