arm_var_f32.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_var_f32.c 4 * Description: Variance of the elements of a floating-point vector 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/statistics_functions.h" 30 31 /** 32 @ingroup groupStats 33 */ 34 35 /** 36 @defgroup variance Variance 37 38 Calculates the variance of the elements in the input vector. 39 The underlying algorithm used is the direct method sometimes referred to as the two-pass method: 40 41 <pre> 42 Result = sum(element - meanOfElements)^2) / numElement - 1 43 44 meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize 45 </pre> 46 47 There are separate functions for floating point, Q31, and Q15 data types. 48 */ 49 50 /** 51 @addtogroup variance 52 @{ 53 */ 54 55 /** 56 @brief Variance of the elements of a floating-point vector. 57 @param[in] pSrc points to the input vector 58 @param[in] blockSize number of samples in input vector 59 @param[out] pResult variance value returned here 60 @return none 61 */ 62 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) 63 64 #include "arm_helium_utils.h" 65 66 void arm_var_f32( 67 const float32_t * pSrc, 68 uint32_t blockSize, 69 float32_t * pResult) 70 { 71 uint32_t blkCnt; /* loop counters */ 72 f32x4_t vecSrc; 73 f32x4_t sumVec = vdupq_n_f32(0.0f); 74 float32_t fMean; 75 float32_t sum = 0.0f; /* accumulator */ 76 float32_t in; /* Temporary variable to store input value */ 77 78 if (blockSize <= 1U) { 79 *pResult = 0; 80 return; 81 } 82 83 arm_mean_f32(pSrc, blockSize, &fMean); 84 85 /* Compute 4 outputs at a time */ 86 blkCnt = blockSize >> 2U; 87 while (blkCnt > 0U) 88 { 89 90 vecSrc = vldrwq_f32(pSrc); 91 /* 92 * sum lanes 93 */ 94 vecSrc = vsubq(vecSrc, fMean); 95 sumVec = vfmaq(sumVec, vecSrc, vecSrc); 96 97 blkCnt --; 98 pSrc += 4; 99 } 100 101 sum = vecAddAcrossF32Mve(sumVec); 102 103 /* 104 * tail 105 */ 106 blkCnt = blockSize & 0x3; 107 while (blkCnt > 0U) 108 { 109 in = *pSrc++ - fMean; 110 sum += in * in; 111 112 /* Decrement loop counter */ 113 blkCnt--; 114 } 115 116 /* Variance */ 117 *pResult = sum / (float32_t) (blockSize - 1); 118 } 119 #else 120 #if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE) 121 void arm_var_f32( 122 const float32_t * pSrc, 123 uint32_t blockSize, 124 float32_t * pResult) 125 { 126 float32_t mean; 127 128 float32_t sum = 0.0f; /* accumulator */ 129 float32_t in; /* Temporary variable to store input value */ 130 uint32_t blkCnt; /* loop counter */ 131 132 float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */ 133 float32x2_t sumV2; 134 float32x4_t inV; 135 float32x4_t avg; 136 137 arm_mean_f32(pSrc,blockSize,&mean); 138 avg = vdupq_n_f32(mean); 139 140 blkCnt = blockSize >> 2U; 141 142 /* Compute 4 outputs at a time. 143 ** a second loop below computes the remaining 1 to 3 samples. */ 144 while (blkCnt > 0U) 145 { 146 /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 147 /* Compute Power and then store the result in a temporary variable, sum. */ 148 inV = vld1q_f32(pSrc); 149 inV = vsubq_f32(inV, avg); 150 sumV = vmlaq_f32(sumV, inV, inV); 151 pSrc += 4; 152 153 /* Decrement the loop counter */ 154 blkCnt--; 155 } 156 157 sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV)); 158 sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1); 159 160 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 161 ** No loop unrolling is used. */ 162 blkCnt = blockSize % 0x4U; 163 164 while (blkCnt > 0U) 165 { 166 /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 167 /* compute power and then store the result in a temporary variable, sum. */ 168 in = *pSrc++; 169 in = in - mean; 170 sum += in * in; 171 172 /* Decrement the loop counter */ 173 blkCnt--; 174 } 175 176 /* Variance */ 177 *pResult = sum / (float32_t)(blockSize - 1.0f); 178 179 } 180 181 #else 182 void arm_var_f32( 183 const float32_t * pSrc, 184 uint32_t blockSize, 185 float32_t * pResult) 186 { 187 uint32_t blkCnt; /* Loop counter */ 188 float32_t sum = 0.0f; /* Temporary result storage */ 189 float32_t fSum = 0.0f; 190 float32_t fMean, fValue; 191 const float32_t * pInput = pSrc; 192 193 if (blockSize <= 1U) 194 { 195 *pResult = 0; 196 return; 197 } 198 199 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) 200 201 /* Loop unrolling: Compute 4 outputs at a time */ 202 blkCnt = blockSize >> 2U; 203 204 while (blkCnt > 0U) 205 { 206 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 207 208 sum += *pInput++; 209 sum += *pInput++; 210 sum += *pInput++; 211 sum += *pInput++; 212 213 214 /* Decrement loop counter */ 215 blkCnt--; 216 } 217 218 /* Loop unrolling: Compute remaining outputs */ 219 blkCnt = blockSize % 0x4U; 220 221 #else 222 223 /* Initialize blkCnt with number of samples */ 224 blkCnt = blockSize; 225 226 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 227 228 while (blkCnt > 0U) 229 { 230 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 231 232 sum += *pInput++; 233 234 /* Decrement loop counter */ 235 blkCnt--; 236 } 237 238 /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ 239 fMean = sum / (float32_t) blockSize; 240 241 pInput = pSrc; 242 243 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) 244 245 /* Loop unrolling: Compute 4 outputs at a time */ 246 blkCnt = blockSize >> 2U; 247 248 while (blkCnt > 0U) 249 { 250 fValue = *pInput++ - fMean; 251 fSum += fValue * fValue; 252 253 fValue = *pInput++ - fMean; 254 fSum += fValue * fValue; 255 256 fValue = *pInput++ - fMean; 257 fSum += fValue * fValue; 258 259 fValue = *pInput++ - fMean; 260 fSum += fValue * fValue; 261 262 /* Decrement loop counter */ 263 blkCnt--; 264 } 265 266 /* Loop unrolling: Compute remaining outputs */ 267 blkCnt = blockSize % 0x4U; 268 269 #else 270 271 /* Initialize blkCnt with number of samples */ 272 blkCnt = blockSize; 273 274 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 275 276 while (blkCnt > 0U) 277 { 278 fValue = *pInput++ - fMean; 279 fSum += fValue * fValue; 280 281 /* Decrement loop counter */ 282 blkCnt--; 283 } 284 285 /* Variance */ 286 *pResult = fSum / (float32_t)(blockSize - 1.0f); 287 } 288 #endif /* #if defined(ARM_MATH_NEON) */ 289 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ 290 291 /** 292 @} end of variance group 293 */