arm_shift_q7.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_shift_q7.c 4 * Description: Processing function for the Q7 Shifting 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/basic_math_functions.h" 30 31 /** 32 @ingroup groupMath 33 */ 34 35 /** 36 @addtogroup BasicShift 37 @{ 38 */ 39 40 /** 41 @brief Shifts the elements of a Q7 vector a specified number of bits 42 @param[in] pSrc points to the input vector 43 @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right. 44 @param[out] pDst points to the output vector 45 @param[in] blockSize number of samples in each vector 46 @return none 47 48 @par onditions for optimum performance 49 Input and output buffers should be aligned by 32-bit 50 @par Scaling and Overflow Behavior 51 The function uses saturating arithmetic. 52 Results outside of the allowable Q7 range [0x80 0x7F] are saturated. 53 */ 54 55 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 56 57 #include "arm_helium_utils.h" 58 59 void arm_shift_q7( 60 const q7_t * pSrc, 61 int8_t shiftBits, 62 q7_t * pDst, 63 uint32_t blockSize) 64 { 65 uint32_t blkCnt; /* loop counters */ 66 q7x16_t vecSrc; 67 q7x16_t vecDst; 68 69 /* Compute 16 outputs at a time */ 70 blkCnt = blockSize >> 4; 71 while (blkCnt > 0U) 72 { 73 /* 74 * C = A (>> or <<) shiftBits 75 * Shift the input and then store the result in the destination buffer. 76 */ 77 vecSrc = vld1q(pSrc); 78 vecDst = vqshlq_r(vecSrc, shiftBits); 79 vst1q(pDst, vecDst); 80 /* 81 * Decrement the blockSize loop counter 82 */ 83 blkCnt--; 84 /* 85 * advance vector source and destination pointers 86 */ 87 pSrc += 16; 88 pDst += 16; 89 } 90 /* 91 * tail 92 */ 93 blkCnt = blockSize & 0xF; 94 if (blkCnt > 0U) 95 { 96 mve_pred16_t p0 = vctp8q(blkCnt); 97 vecSrc = vld1q(pSrc); 98 vecDst = vqshlq_r(vecSrc, shiftBits); 99 vstrbq_p(pDst, vecDst, p0); 100 } 101 } 102 103 #else 104 void arm_shift_q7( 105 const q7_t * pSrc, 106 int8_t shiftBits, 107 q7_t * pDst, 108 uint32_t blockSize) 109 { 110 uint32_t blkCnt; /* Loop counter */ 111 uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */ 112 113 #if defined (ARM_MATH_LOOPUNROLL) 114 115 #if defined (ARM_MATH_DSP) 116 q7_t in1, in2, in3, in4; /* Temporary input variables */ 117 #endif 118 119 /* Loop unrolling: Compute 4 outputs at a time */ 120 blkCnt = blockSize >> 2U; 121 122 /* If the shift value is positive then do right shift else left shift */ 123 if (sign == 0U) 124 { 125 while (blkCnt > 0U) 126 { 127 /* C = A << shiftBits */ 128 129 #if defined (ARM_MATH_DSP) 130 /* Read 4 inputs */ 131 in1 = *pSrc++; 132 in2 = *pSrc++; 133 in3 = *pSrc++; 134 in4 = *pSrc++; 135 136 /* Pack and store result in destination buffer (in single write) */ 137 write_q7x4_ia (&pDst, __PACKq7(__SSAT(((q15_t) in1 << shiftBits), 8), 138 __SSAT(((q15_t) in2 << shiftBits), 8), 139 __SSAT(((q15_t) in3 << shiftBits), 8), 140 __SSAT(((q15_t) in4 << shiftBits), 8) )); 141 #else 142 *pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8); 143 *pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8); 144 *pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8); 145 *pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8); 146 #endif 147 148 /* Decrement loop counter */ 149 blkCnt--; 150 } 151 } 152 else 153 { 154 while (blkCnt > 0U) 155 { 156 /* C = A >> shiftBits */ 157 158 #if defined (ARM_MATH_DSP) 159 /* Read 4 inputs */ 160 in1 = *pSrc++; 161 in2 = *pSrc++; 162 in3 = *pSrc++; 163 in4 = *pSrc++; 164 165 /* Pack and store result in destination buffer (in single write) */ 166 write_q7x4_ia (&pDst, __PACKq7((in1 >> -shiftBits), 167 (in2 >> -shiftBits), 168 (in3 >> -shiftBits), 169 (in4 >> -shiftBits) )); 170 #else 171 *pDst++ = (*pSrc++ >> -shiftBits); 172 *pDst++ = (*pSrc++ >> -shiftBits); 173 *pDst++ = (*pSrc++ >> -shiftBits); 174 *pDst++ = (*pSrc++ >> -shiftBits); 175 #endif 176 177 /* Decrement loop counter */ 178 blkCnt--; 179 } 180 } 181 182 /* Loop unrolling: Compute remaining outputs */ 183 blkCnt = blockSize % 0x4U; 184 185 #else 186 187 /* Initialize blkCnt with number of samples */ 188 blkCnt = blockSize; 189 190 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 191 192 /* If the shift value is positive then do right shift else left shift */ 193 if (sign == 0U) 194 { 195 while (blkCnt > 0U) 196 { 197 /* C = A << shiftBits */ 198 199 /* Shift input and store result in destination buffer. */ 200 *pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8); 201 202 /* Decrement loop counter */ 203 blkCnt--; 204 } 205 } 206 else 207 { 208 while (blkCnt > 0U) 209 { 210 /* C = A >> shiftBits */ 211 212 /* Shift input and store result in destination buffer. */ 213 *pDst++ = (*pSrc++ >> -shiftBits); 214 215 /* Decrement loop counter */ 216 blkCnt--; 217 } 218 } 219 220 } 221 #endif /* defined(ARM_MATH_MVEI) */ 222 223 /** 224 @} end of BasicShift group 225 */