arm_scale_q31.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_scale_q31.c 4 * Description: Multiplies a Q31 vector by a scalar 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/basic_math_functions.h" 30 31 /** 32 @ingroup groupMath 33 */ 34 35 /** 36 @addtogroup BasicScale 37 @{ 38 */ 39 40 /** 41 @brief Multiplies a Q31 vector by a scalar. 42 @param[in] pSrc points to the input vector 43 @param[in] scaleFract fractional portion of the scale value 44 @param[in] shift number of bits to shift the result by 45 @param[out] pDst points to the output vector 46 @param[in] blockSize number of samples in each vector 47 @return none 48 49 @par Scaling and Overflow Behavior 50 The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format. 51 These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format. 52 */ 53 54 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 55 56 #include "arm_helium_utils.h" 57 58 void arm_scale_q31( 59 const q31_t * pSrc, 60 q31_t scaleFract, 61 int8_t shift, 62 q31_t * pDst, 63 uint32_t blockSize) 64 { 65 uint32_t blkCnt; /* loop counters */ 66 q31x4_t vecSrc; 67 q31x4_t vecDst; 68 69 /* Compute 4 outputs at a time */ 70 blkCnt = blockSize >> 2; 71 while (blkCnt > 0U) 72 { 73 /* 74 * C = A * scale 75 * Scale the input and then store the result in the destination buffer. 76 */ 77 vecSrc = vld1q(pSrc); 78 vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract)); 79 vecDst = vqshlq_r(vecDst, shift + 1); 80 vst1q(pDst, vecDst); 81 /* 82 * Decrement the blockSize loop counter 83 */ 84 blkCnt--; 85 /* 86 * advance vector source and destination pointers 87 */ 88 pSrc += 4; 89 pDst += 4; 90 } 91 /* 92 * tail 93 */ 94 blkCnt = blockSize & 3; 95 if (blkCnt > 0U) 96 { 97 mve_pred16_t p0 = vctp32q(blkCnt); 98 vecSrc = vld1q(pSrc); 99 vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract)); 100 vecDst = vqshlq_r(vecDst, shift + 1); 101 vstrwq_p(pDst, vecDst, p0); 102 } 103 } 104 105 #else 106 void arm_scale_q31( 107 const q31_t *pSrc, 108 q31_t scaleFract, 109 int8_t shift, 110 q31_t *pDst, 111 uint32_t blockSize) 112 { 113 uint32_t blkCnt; /* Loop counter */ 114 q31_t in, out; /* Temporary variables */ 115 int8_t kShift = shift + 1; /* Shift to apply after scaling */ 116 int8_t sign = (kShift & 0x80); 117 118 #if defined (ARM_MATH_LOOPUNROLL) 119 120 /* Loop unrolling: Compute 4 outputs at a time */ 121 blkCnt = blockSize >> 2U; 122 123 if (sign == 0U) 124 { 125 while (blkCnt > 0U) 126 { 127 /* C = A * scale */ 128 129 /* Scale input and store result in destination buffer. */ 130 in = *pSrc++; /* read input from source */ 131 in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */ 132 out = in << kShift; /* apply shifting */ 133 if (in != (out >> kShift)) /* saturate the result */ 134 out = 0x7FFFFFFF ^ (in >> 31); 135 *pDst++ = out; /* Store result destination */ 136 137 in = *pSrc++; 138 in = ((q63_t) in * scaleFract) >> 32; 139 out = in << kShift; 140 if (in != (out >> kShift)) 141 out = 0x7FFFFFFF ^ (in >> 31); 142 *pDst++ = out; 143 144 in = *pSrc++; 145 in = ((q63_t) in * scaleFract) >> 32; 146 out = in << kShift; 147 if (in != (out >> kShift)) 148 out = 0x7FFFFFFF ^ (in >> 31); 149 *pDst++ = out; 150 151 in = *pSrc++; 152 in = ((q63_t) in * scaleFract) >> 32; 153 out = in << kShift; 154 if (in != (out >> kShift)) 155 out = 0x7FFFFFFF ^ (in >> 31); 156 *pDst++ = out; 157 158 /* Decrement loop counter */ 159 blkCnt--; 160 } 161 } 162 else 163 { 164 while (blkCnt > 0U) 165 { 166 /* C = A * scale */ 167 168 /* Scale input and store result in destination buffer. */ 169 in = *pSrc++; /* read four inputs from source */ 170 in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */ 171 out = in >> -kShift; /* apply shifting */ 172 *pDst++ = out; /* Store result destination */ 173 174 in = *pSrc++; 175 in = ((q63_t) in * scaleFract) >> 32; 176 out = in >> -kShift; 177 *pDst++ = out; 178 179 in = *pSrc++; 180 in = ((q63_t) in * scaleFract) >> 32; 181 out = in >> -kShift; 182 *pDst++ = out; 183 184 in = *pSrc++; 185 in = ((q63_t) in * scaleFract) >> 32; 186 out = in >> -kShift; 187 *pDst++ = out; 188 189 /* Decrement loop counter */ 190 blkCnt--; 191 } 192 } 193 194 /* Loop unrolling: Compute remaining outputs */ 195 blkCnt = blockSize % 0x4U; 196 197 #else 198 199 /* Initialize blkCnt with number of samples */ 200 blkCnt = blockSize; 201 202 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 203 204 if (sign == 0U) 205 { 206 while (blkCnt > 0U) 207 { 208 /* C = A * scale */ 209 210 /* Scale input and store result in destination buffer. */ 211 in = *pSrc++; 212 in = ((q63_t) in * scaleFract) >> 32; 213 out = in << kShift; 214 if (in != (out >> kShift)) 215 out = 0x7FFFFFFF ^ (in >> 31); 216 *pDst++ = out; 217 218 /* Decrement loop counter */ 219 blkCnt--; 220 } 221 } 222 else 223 { 224 while (blkCnt > 0U) 225 { 226 /* C = A * scale */ 227 228 /* Scale input and store result in destination buffer. */ 229 in = *pSrc++; 230 in = ((q63_t) in * scaleFract) >> 32; 231 out = in >> -kShift; 232 *pDst++ = out; 233 234 /* Decrement loop counter */ 235 blkCnt--; 236 } 237 } 238 239 } 240 #endif /* defined(ARM_MATH_MVEI) */ 241 242 /** 243 @} end of BasicScale group 244 */