/ Drivers / CMSIS / DSP / Source / StatisticsFunctions / arm_mse_q15.c
arm_mse_q15.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_mse_q15.c
  4   * Description:  Mean square error between two Q15 vectors
  5   *
  6   * $Date:        04 April 2022
  7   * $Revision:    V1.10.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/statistics_functions.h"
 30  
 31  /**
 32    @ingroup groupStats
 33   */
 34  
 35  
 36  /**
 37    @addtogroup MSE
 38    @{
 39   */
 40  
 41  /**
 42    @brief         Mean square error between two Q15 vectors.
 43    @param[in]     pSrcA       points to the first input vector
 44    @param[in]     pSrcB       points to the second input vector
 45    @param[in]     blockSize   number of samples in input vector
 46    @param[out]    pResult     mean square error
 47    @return        none
 48   */
 49  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 50  void arm_mse_q15(
 51    const q15_t * pSrcA,
 52    const q15_t * pSrcB,
 53          uint32_t blockSize,
 54          q15_t * pResult)
 55  {
 56      uint32_t  blkCnt;           /* loop counters */
 57      q15x8_t vecSrcA,vecSrcB;
 58      q63_t   sum = 0LL;
 59  
 60      blkCnt = blockSize >> 3U;
 61      while (blkCnt > 0U)
 62      {
 63          vecSrcA = vld1q(pSrcA);
 64          vecSrcB = vld1q(pSrcB);
 65  
 66          vecSrcA = vshrq(vecSrcA,1);
 67          vecSrcB = vshrq(vecSrcB,1);
 68  
 69          vecSrcA = vqsubq(vecSrcA,vecSrcB);
 70          /*
 71           * sum lanes
 72           */
 73          sum = vmlaldavaq(sum, vecSrcA, vecSrcA);
 74  
 75          blkCnt--;
 76          pSrcA += 8;
 77          pSrcB += 8;
 78      }
 79  
 80      /*
 81       * tail
 82       */
 83      blkCnt = blockSize & 7;
 84      if (blkCnt > 0U)
 85      {
 86          mve_pred16_t p0 = vctp16q(blkCnt);
 87          vecSrcA = vld1q(pSrcA);
 88          vecSrcB = vld1q(pSrcB);
 89  
 90          vecSrcA = vshrq(vecSrcA,1);
 91          vecSrcB = vshrq(vecSrcB,1);
 92  
 93          vecSrcA = vqsubq(vecSrcA,vecSrcB);
 94  
 95          sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0);
 96      }
 97  
 98      
 99  
100      *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
101  }
102  #else
103  void arm_mse_q15(
104    const q15_t * pSrcA,
105    const q15_t * pSrcB,
106          uint32_t blockSize,
107          q15_t * pResult)
108  {
109          uint32_t blkCnt;                               /* Loop counter */
110          q63_t sum = 0;                                 /* Temporary result storage */
111          q15_t inA,inB;                                       /* Temporary variable to store input value */
112  
113  
114  #if defined (ARM_MATH_LOOPUNROLL)
115  
116    /* Loop unrolling: Compute 4 outputs at a time */
117    blkCnt = blockSize >> 2U;
118  
119    while (blkCnt > 0U)
120    {
121  
122      inA = *pSrcA++ >> 1;
123      inB = *pSrcB++ >> 1;
124      inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
125      sum += (q63_t)((q31_t) inA * inA);
126  
127      inA = *pSrcA++ >> 1;
128      inB = *pSrcB++ >> 1;
129      inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
130      sum += (q63_t)((q31_t) inA * inA);
131  
132      inA = *pSrcA++ >> 1;
133      inB = *pSrcB++ >> 1;
134      inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
135      sum += (q63_t)((q31_t) inA * inA);
136  
137      inA = *pSrcA++ >> 1;
138      inB = *pSrcB++ >> 1;
139      inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
140      sum += (q63_t)((q31_t) inA * inA);
141  
142      /* Decrement loop counter */
143      blkCnt--;
144    }
145  
146    /* Loop unrolling: Compute remaining outputs */
147    blkCnt = blockSize % 0x4U;
148  
149  #else
150  
151    /* Initialize blkCnt with number of samples */
152    blkCnt = blockSize;
153  
154  #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
155  
156    while (blkCnt > 0U)
157    {
158  
159      inA = *pSrcA++ >> 1;
160      inB = *pSrcB++ >> 1;
161      inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
162      sum += (q63_t)((q31_t) inA * inA);
163  
164      /* Decrement loop counter */
165      blkCnt--;
166    }
167  
168    /* Store result in q15 format */
169    *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
170  }
171  #endif /* defined(ARM_MATH_MVEI) */
172  
173  /**
174    @} end of MSE group
175   */