arm_mse_q15.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_mse_q15.c 4 * Description: Mean square error between two Q15 vectors 5 * 6 * $Date: 04 April 2022 7 * $Revision: V1.10.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/statistics_functions.h" 30 31 /** 32 @ingroup groupStats 33 */ 34 35 36 /** 37 @addtogroup MSE 38 @{ 39 */ 40 41 /** 42 @brief Mean square error between two Q15 vectors. 43 @param[in] pSrcA points to the first input vector 44 @param[in] pSrcB points to the second input vector 45 @param[in] blockSize number of samples in input vector 46 @param[out] pResult mean square error 47 @return none 48 */ 49 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 50 void arm_mse_q15( 51 const q15_t * pSrcA, 52 const q15_t * pSrcB, 53 uint32_t blockSize, 54 q15_t * pResult) 55 { 56 uint32_t blkCnt; /* loop counters */ 57 q15x8_t vecSrcA,vecSrcB; 58 q63_t sum = 0LL; 59 60 blkCnt = blockSize >> 3U; 61 while (blkCnt > 0U) 62 { 63 vecSrcA = vld1q(pSrcA); 64 vecSrcB = vld1q(pSrcB); 65 66 vecSrcA = vshrq(vecSrcA,1); 67 vecSrcB = vshrq(vecSrcB,1); 68 69 vecSrcA = vqsubq(vecSrcA,vecSrcB); 70 /* 71 * sum lanes 72 */ 73 sum = vmlaldavaq(sum, vecSrcA, vecSrcA); 74 75 blkCnt--; 76 pSrcA += 8; 77 pSrcB += 8; 78 } 79 80 /* 81 * tail 82 */ 83 blkCnt = blockSize & 7; 84 if (blkCnt > 0U) 85 { 86 mve_pred16_t p0 = vctp16q(blkCnt); 87 vecSrcA = vld1q(pSrcA); 88 vecSrcB = vld1q(pSrcB); 89 90 vecSrcA = vshrq(vecSrcA,1); 91 vecSrcB = vshrq(vecSrcB,1); 92 93 vecSrcA = vqsubq(vecSrcA,vecSrcB); 94 95 sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0); 96 } 97 98 99 100 *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16); 101 } 102 #else 103 void arm_mse_q15( 104 const q15_t * pSrcA, 105 const q15_t * pSrcB, 106 uint32_t blockSize, 107 q15_t * pResult) 108 { 109 uint32_t blkCnt; /* Loop counter */ 110 q63_t sum = 0; /* Temporary result storage */ 111 q15_t inA,inB; /* Temporary variable to store input value */ 112 113 114 #if defined (ARM_MATH_LOOPUNROLL) 115 116 /* Loop unrolling: Compute 4 outputs at a time */ 117 blkCnt = blockSize >> 2U; 118 119 while (blkCnt > 0U) 120 { 121 122 inA = *pSrcA++ >> 1; 123 inB = *pSrcB++ >> 1; 124 inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16); 125 sum += (q63_t)((q31_t) inA * inA); 126 127 inA = *pSrcA++ >> 1; 128 inB = *pSrcB++ >> 1; 129 inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16); 130 sum += (q63_t)((q31_t) inA * inA); 131 132 inA = *pSrcA++ >> 1; 133 inB = *pSrcB++ >> 1; 134 inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16); 135 sum += (q63_t)((q31_t) inA * inA); 136 137 inA = *pSrcA++ >> 1; 138 inB = *pSrcB++ >> 1; 139 inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16); 140 sum += (q63_t)((q31_t) inA * inA); 141 142 /* Decrement loop counter */ 143 blkCnt--; 144 } 145 146 /* Loop unrolling: Compute remaining outputs */ 147 blkCnt = blockSize % 0x4U; 148 149 #else 150 151 /* Initialize blkCnt with number of samples */ 152 blkCnt = blockSize; 153 154 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 155 156 while (blkCnt > 0U) 157 { 158 159 inA = *pSrcA++ >> 1; 160 inB = *pSrcB++ >> 1; 161 inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16); 162 sum += (q63_t)((q31_t) inA * inA); 163 164 /* Decrement loop counter */ 165 blkCnt--; 166 } 167 168 /* Store result in q15 format */ 169 *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16); 170 } 171 #endif /* defined(ARM_MATH_MVEI) */ 172 173 /** 174 @} end of MSE group 175 */