/ Drivers / CMSIS / DSP / Source / StatisticsFunctions / arm_mean_q7.c
arm_mean_q7.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_mean_q7.c
  4   * Description:  Mean value of a Q7 vector
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/statistics_functions.h"
 30  
 31  /**
 32    @ingroup groupStats
 33   */
 34  
 35  /**
 36    @addtogroup mean
 37    @{
 38   */
 39  
 40  /**
 41    @brief         Mean value of a Q7 vector.
 42    @param[in]     pSrc       points to the input vector
 43    @param[in]     blockSize  number of samples in input vector
 44    @param[out]    pResult    mean value returned here
 45    @return        none
 46  
 47    @par           Scaling and Overflow Behavior
 48                     The function is implemented using a 32-bit internal accumulator.
 49                     The input is represented in 1.7 format and is accumulated in a 32-bit
 50                     accumulator in 25.7 format.
 51                     There is no risk of internal overflow with this approach, and the
 52                     full precision of intermediate result is preserved.
 53                     Finally, the accumulator is truncated to yield a result of 1.7 format.
 54   */
 55  
 56  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 57  
 58  void arm_mean_q7(
 59    const q7_t * pSrc,
 60          uint32_t blockSize,
 61          q7_t * pResult)
 62  {
 63      uint32_t  blkCnt;           /* loop counters */
 64      q7x16_t vecSrc;
 65      q31_t     sum = 0L;
 66  
 67  
 68      blkCnt = blockSize >> 4;
 69      while (blkCnt > 0U)
 70      {
 71          vecSrc = vldrbq_s8(pSrc);
 72          /*
 73           * sum lanes
 74           */
 75          sum = vaddvaq(sum, vecSrc);
 76  
 77          blkCnt--;
 78          pSrc += 16;
 79      }
 80  
 81      blkCnt = blockSize & 0xF;
 82      while (blkCnt > 0U)
 83      {
 84        /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
 85        sum += *pSrc++;
 86    
 87        /* Decrement loop counter */
 88        blkCnt--;
 89      }
 90  
 91      /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
 92      /* Store the result to the destination */
 93      *pResult = (q7_t) (sum / (int32_t) blockSize);
 94  }
 95  #else
 96  void arm_mean_q7(
 97    const q7_t * pSrc,
 98          uint32_t blockSize,
 99          q7_t * pResult)
100  {
101          uint32_t blkCnt;                               /* Loop counter */
102          q31_t sum = 0;                                 /* Temporary result storage */
103  
104  #if defined (ARM_MATH_LOOPUNROLL)
105          q31_t in;
106  #endif
107  
108  #if defined (ARM_MATH_LOOPUNROLL)
109  
110    /* Loop unrolling: Compute 4 outputs at a time */
111    blkCnt = blockSize >> 2U;
112  
113    while (blkCnt > 0U)
114    {
115      /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
116      in = read_q7x4_ia ((q7_t **) &pSrc);
117      sum += ((in << 24U) >> 24U);
118      sum += ((in << 16U) >> 24U);
119      sum += ((in <<  8U) >> 24U);
120      sum +=  (in >> 24U);
121  
122      /* Decrement the loop counter */
123      blkCnt--;
124    }
125  
126    /* Loop unrolling: Compute remaining outputs */
127    blkCnt = blockSize % 0x4U;
128  
129  #else
130  
131    /* Initialize blkCnt with number of samples */
132    blkCnt = blockSize;
133  
134  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
135  
136    while (blkCnt > 0U)
137    {
138      /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
139      sum += *pSrc++;
140  
141      /* Decrement loop counter */
142      blkCnt--;
143    }
144  
145    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
146    /* Store result to destination */
147    *pResult = (q7_t) (sum / (int32_t) blockSize);
148  }
149  #endif /* defined(ARM_MATH_MVEI) */
150  
151  /**
152    @} end of mean group
153   */