arm_scale_q7.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_scale_q7.c 4 * Description: Multiplies a Q7 vector by a scalar 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/basic_math_functions.h" 30 31 /** 32 @ingroup groupMath 33 */ 34 35 /** 36 @addtogroup BasicScale 37 @{ 38 */ 39 40 /** 41 @brief Multiplies a Q7 vector by a scalar. 42 @param[in] pSrc points to the input vector 43 @param[in] scaleFract fractional portion of the scale value 44 @param[in] shift number of bits to shift the result by 45 @param[out] pDst points to the output vector 46 @param[in] blockSize number of samples in each vector 47 @return none 48 49 @par Scaling and Overflow Behavior 50 The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format. 51 These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format. 52 */ 53 54 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 55 56 #include "arm_helium_utils.h" 57 58 59 void arm_scale_q7( 60 const q7_t * pSrc, 61 q7_t scaleFract, 62 int8_t shift, 63 q7_t * pDst, 64 uint32_t blockSize) 65 { 66 uint32_t blkCnt; /* loop counters */ 67 q7x16_t vecSrc; 68 q7x16_t vecDst; 69 70 71 /* Compute 16 outputs at a time */ 72 blkCnt = blockSize >> 4; 73 74 while (blkCnt > 0U) 75 { 76 /* 77 * C = A * scale 78 * Scale the input and then store the result in the destination buffer. 79 */ 80 vecSrc = vld1q(pSrc); 81 vecDst = vmulhq(vecSrc, vdupq_n_s8(scaleFract)); 82 vecDst = vqshlq_r(vecDst, shift + 1); 83 vst1q(pDst, vecDst); 84 /* 85 * Decrement the blockSize loop counter 86 */ 87 blkCnt--; 88 /* 89 * advance vector source and destination pointers 90 */ 91 pSrc += 16; 92 pDst += 16; 93 } 94 /* 95 * tail 96 */ 97 blkCnt = blockSize & 0xF; 98 if (blkCnt > 0U) 99 { 100 mve_pred16_t p0 = vctp8q(blkCnt); 101 vecSrc = vld1q(pSrc); 102 vecDst = vmulhq(vecSrc, vdupq_n_s8(scaleFract)); 103 vecDst = vqshlq_r(vecDst, shift + 1); 104 vstrbq_p(pDst, vecDst, p0); 105 } 106 107 } 108 109 #else 110 void arm_scale_q7( 111 const q7_t * pSrc, 112 q7_t scaleFract, 113 int8_t shift, 114 q7_t * pDst, 115 uint32_t blockSize) 116 { 117 uint32_t blkCnt; /* Loop counter */ 118 int8_t kShift = 7 - shift; /* Shift to apply after scaling */ 119 120 #if defined (ARM_MATH_LOOPUNROLL) 121 122 #if defined (ARM_MATH_DSP) 123 q7_t in1, in2, in3, in4; /* Temporary input variables */ 124 q7_t out1, out2, out3, out4; /* Temporary output variables */ 125 #endif 126 127 /* Loop unrolling: Compute 4 outputs at a time */ 128 blkCnt = blockSize >> 2U; 129 130 while (blkCnt > 0U) 131 { 132 /* C = A * scale */ 133 134 #if defined (ARM_MATH_DSP) 135 /* Reading 4 inputs from memory */ 136 in1 = *pSrc++; 137 in2 = *pSrc++; 138 in3 = *pSrc++; 139 in4 = *pSrc++; 140 141 /* Scale inputs and store result in the temporary variable. */ 142 out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8)); 143 out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8)); 144 out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8)); 145 out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8)); 146 147 /* Pack and store result in destination buffer (in single write) */ 148 write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4)); 149 #else 150 *pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8)); 151 *pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8)); 152 *pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8)); 153 *pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8)); 154 #endif 155 156 /* Decrement loop counter */ 157 blkCnt--; 158 } 159 160 /* Loop unrolling: Compute remaining outputs */ 161 blkCnt = blockSize % 0x4U; 162 163 #else 164 165 /* Initialize blkCnt with number of samples */ 166 blkCnt = blockSize; 167 168 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 169 170 while (blkCnt > 0U) 171 { 172 /* C = A * scale */ 173 174 /* Scale input and store result in destination buffer. */ 175 *pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8)); 176 177 /* Decrement loop counter */ 178 blkCnt--; 179 } 180 181 } 182 #endif /* defined(ARM_MATH_MVEI) */ 183 184 /** 185 @} end of BasicScale group 186 */