arm_clip_f16.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_clip_f16.c 4 * Description: Floating-point vector addition 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/basic_math_functions_f16.h" 30 31 /** 32 @ingroup groupMath 33 */ 34 35 36 /** 37 @addtogroup BasicClip 38 @{ 39 */ 40 41 /** 42 @brief Elementwise floating-point clipping 43 @param[in] pSrc points to input values 44 @param[out] pDst points to output clipped values 45 @param[in] low lower bound 46 @param[in] high higher bound 47 @param[in] numSamples number of samples to clip 48 @return none 49 */ 50 51 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) 52 53 #include "arm_helium_utils.h" 54 55 void arm_clip_f16(const float16_t * pSrc, 56 float16_t * pDst, 57 float16_t low, 58 float16_t high, 59 uint32_t numSamples) 60 { 61 uint32_t blkCnt; 62 f16x8_t curVec0, curVec1; 63 f16x8_t vecLow, vecHigh; 64 65 vecLow = vdupq_n_f16(low); 66 vecHigh = vdupq_n_f16(high); 67 68 curVec0 = vld1q(pSrc); 69 pSrc += 8; 70 /* 71 * unrolled x 2 to allow 72 * vldr/vstr/vmin/vmax 73 * stall free interleaving 74 */ 75 blkCnt = numSamples >> 4; 76 while (blkCnt--) 77 { 78 curVec0 = vmaxnmq(curVec0, vecLow); 79 curVec1 = vld1q(pSrc); 80 pSrc += 8; 81 curVec0 = vminnmq(curVec0, vecHigh); 82 vst1q(pDst, curVec0); 83 pDst += 8; 84 curVec1 = vmaxnmq(curVec1, vecLow); 85 curVec0 = vld1q(pSrc); 86 pSrc += 8; 87 curVec1 = vminnmq(curVec1, vecHigh); 88 vst1q(pDst, curVec1); 89 pDst += 8; 90 } 91 /* 92 * Tail handling 93 */ 94 blkCnt = numSamples - ((numSamples >> 4) << 4); 95 if (blkCnt >= 8) 96 { 97 curVec0 = vmaxnmq(curVec0, vecLow); 98 curVec0 = vminnmq(curVec0, vecHigh); 99 vst1q(pDst, curVec0); 100 pDst += 8; 101 curVec0 = vld1q(pSrc); 102 pSrc += 8; 103 } 104 105 if (blkCnt > 0) 106 { 107 mve_pred16_t p0 = vctp16q(blkCnt & 7); 108 curVec0 = vmaxnmq(curVec0, vecLow); 109 curVec0 = vminnmq(curVec0, vecHigh); 110 vstrhq_p(pDst, curVec0, p0); 111 } 112 } 113 114 #else 115 116 #if defined(ARM_FLOAT16_SUPPORTED) 117 118 void arm_clip_f16(const float16_t * pSrc, 119 float16_t * pDst, 120 float16_t low, 121 float16_t high, 122 uint32_t numSamples) 123 { 124 for (uint32_t i = 0; i < numSamples; i++) 125 { 126 if ((_Float16)pSrc[i] > (_Float16)high) 127 pDst[i] = high; 128 else if ((_Float16)pSrc[i] < (_Float16)low) 129 pDst[i] = low; 130 else 131 pDst[i] = pSrc[i]; 132 } 133 } 134 #endif /* defined(ARM_FLOAT16_SUPPORTED */ 135 136 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ 137 138 139 /** 140 @} end of BasicClip group 141 */