/ Drivers / CMSIS / DSP / Source / StatisticsFunctions / arm_var_f32.c
arm_var_f32.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_var_f32.c
  4   * Description:  Variance of the elements of a floating-point vector
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/statistics_functions.h"
 30  
 31  /**
 32    @ingroup groupStats
 33   */
 34  
 35  /**
 36    @defgroup variance  Variance
 37  
 38    Calculates the variance of the elements in the input vector.
 39    The underlying algorithm used is the direct method sometimes referred to as the two-pass method:
 40  
 41    <pre>
 42        Result = sum(element - meanOfElements)^2) / numElement - 1
 43  
 44        meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize
 45    </pre>
 46  
 47    There are separate functions for floating point, Q31, and Q15 data types.
 48   */
 49  
 50  /**
 51    @addtogroup variance
 52    @{
 53   */
 54  
 55  /**
 56    @brief         Variance of the elements of a floating-point vector.
 57    @param[in]     pSrc       points to the input vector
 58    @param[in]     blockSize  number of samples in input vector
 59    @param[out]    pResult    variance value returned here
 60    @return        none
 61   */
 62  #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 63  
 64  #include "arm_helium_utils.h"
 65  
 66  void arm_var_f32(
 67             const float32_t * pSrc,
 68                   uint32_t blockSize,
 69                   float32_t * pResult)
 70  {
 71      uint32_t         blkCnt;     /* loop counters */
 72      f32x4_t         vecSrc;
 73      f32x4_t         sumVec = vdupq_n_f32(0.0f);
 74      float32_t       fMean;
 75      float32_t sum = 0.0f;                          /* accumulator */
 76      float32_t in;                                  /* Temporary variable to store input value */
 77  
 78      if (blockSize <= 1U) {
 79          *pResult = 0;
 80          return;
 81      }
 82  
 83      arm_mean_f32(pSrc, blockSize, &fMean);
 84  
 85      /* Compute 4 outputs at a time */
 86      blkCnt = blockSize >> 2U;
 87      while (blkCnt > 0U)
 88      {
 89  
 90          vecSrc = vldrwq_f32(pSrc);
 91          /*
 92           * sum lanes
 93           */
 94          vecSrc = vsubq(vecSrc, fMean);
 95          sumVec = vfmaq(sumVec, vecSrc, vecSrc);
 96  
 97          blkCnt --;
 98          pSrc += 4;
 99      }
100  
101      sum = vecAddAcrossF32Mve(sumVec);
102  
103      /*
104       * tail
105       */
106      blkCnt = blockSize & 0x3;
107      while (blkCnt > 0U)
108      {
109         in = *pSrc++ - fMean;
110         sum += in * in;
111  
112         /* Decrement loop counter */
113         blkCnt--;
114      }
115     
116      /* Variance */
117      *pResult = sum / (float32_t) (blockSize - 1);
118  }
119  #else
120  #if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
121  void arm_var_f32(
122             const float32_t * pSrc,
123                   uint32_t blockSize,
124                   float32_t * pResult)
125  {
126    float32_t mean;
127  
128    float32_t sum = 0.0f;                          /* accumulator */
129    float32_t in;                                  /* Temporary variable to store input value */
130    uint32_t blkCnt;                               /* loop counter */
131  
132    float32x4_t sumV = vdupq_n_f32(0.0f);                          /* Temporary result storage */
133    float32x2_t sumV2;
134    float32x4_t inV;
135    float32x4_t avg;
136  
137    arm_mean_f32(pSrc,blockSize,&mean);
138    avg = vdupq_n_f32(mean);
139  
140    blkCnt = blockSize >> 2U;
141  
142    /* Compute 4 outputs at a time.
143     ** a second loop below computes the remaining 1 to 3 samples. */
144    while (blkCnt > 0U)
145    {
146      /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
147      /* Compute Power and then store the result in a temporary variable, sum. */
148      inV = vld1q_f32(pSrc);
149      inV = vsubq_f32(inV, avg);
150      sumV = vmlaq_f32(sumV, inV, inV);
151      pSrc += 4;
152  
153      /* Decrement the loop counter */
154      blkCnt--;
155    }
156  
157    sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
158    sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
159  
160    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
161     ** No loop unrolling is used. */
162    blkCnt = blockSize % 0x4U;
163  
164    while (blkCnt > 0U)
165    {
166      /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
167      /* compute power and then store the result in a temporary variable, sum. */
168      in = *pSrc++;
169      in = in - mean;
170      sum += in * in;
171  
172      /* Decrement the loop counter */
173      blkCnt--;
174    }
175  
176    /* Variance */
177    *pResult = sum / (float32_t)(blockSize - 1.0f);
178  
179  }
180  
181  #else
182  void arm_var_f32(
183    const float32_t * pSrc,
184          uint32_t blockSize,
185          float32_t * pResult)
186  {
187          uint32_t blkCnt;                               /* Loop counter */
188          float32_t sum = 0.0f;                          /* Temporary result storage */
189          float32_t fSum = 0.0f;
190          float32_t fMean, fValue;
191    const float32_t * pInput = pSrc;
192  
193    if (blockSize <= 1U)
194    {
195      *pResult = 0;
196      return;
197    }
198  
199  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
200  
201    /* Loop unrolling: Compute 4 outputs at a time */
202    blkCnt = blockSize >> 2U;
203  
204    while (blkCnt > 0U)
205    {
206      /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
207  
208      sum += *pInput++;
209      sum += *pInput++;
210      sum += *pInput++;
211      sum += *pInput++;
212  
213  
214      /* Decrement loop counter */
215      blkCnt--;
216    }
217  
218    /* Loop unrolling: Compute remaining outputs */
219    blkCnt = blockSize % 0x4U;
220  
221  #else
222  
223    /* Initialize blkCnt with number of samples */
224    blkCnt = blockSize;
225  
226  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
227  
228    while (blkCnt > 0U)
229    {
230      /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
231  
232      sum += *pInput++;
233  
234      /* Decrement loop counter */
235      blkCnt--;
236    }
237  
238    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
239    fMean = sum / (float32_t) blockSize;
240  
241    pInput = pSrc;
242  
243  #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
244  
245    /* Loop unrolling: Compute 4 outputs at a time */
246    blkCnt = blockSize >> 2U;
247  
248    while (blkCnt > 0U)
249    {
250      fValue = *pInput++ - fMean;
251      fSum += fValue * fValue;
252  
253      fValue = *pInput++ - fMean;
254      fSum += fValue * fValue;
255  
256      fValue = *pInput++ - fMean;
257      fSum += fValue * fValue;
258  
259      fValue = *pInput++ - fMean;
260      fSum += fValue * fValue;
261  
262      /* Decrement loop counter */
263      blkCnt--;
264    }
265  
266    /* Loop unrolling: Compute remaining outputs */
267    blkCnt = blockSize % 0x4U;
268  
269  #else
270  
271    /* Initialize blkCnt with number of samples */
272    blkCnt = blockSize;
273  
274  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
275  
276    while (blkCnt > 0U)
277    {
278      fValue = *pInput++ - fMean;
279      fSum += fValue * fValue;
280  
281      /* Decrement loop counter */
282      blkCnt--;
283    }
284  
285    /* Variance */
286    *pResult = fSum / (float32_t)(blockSize - 1.0f);
287  }
288  #endif /* #if defined(ARM_MATH_NEON) */
289  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
290  
291  /**
292    @} end of variance group
293   */