/ Drivers / CMSIS / DSP / Source / BasicMathFunctions / arm_clip_f16.c
arm_clip_f16.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_clip_f16.c
  4   * Description:  Floating-point vector addition
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/basic_math_functions_f16.h"
 30  
 31  /**
 32    @ingroup groupMath
 33   */
 34  
 35  
 36  /**
 37    @addtogroup BasicClip
 38    @{
 39   */
 40  
 41  /**
 42    @brief         Elementwise floating-point clipping
 43    @param[in]     pSrc          points to input values
 44    @param[out]    pDst          points to output clipped values
 45    @param[in]     low           lower bound
 46    @param[in]     high          higher bound
 47    @param[in]     numSamples    number of samples to clip
 48    @return        none
 49   */
 50  
 51  #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 52  
 53  #include "arm_helium_utils.h"
 54  
 55  void arm_clip_f16(const float16_t * pSrc, 
 56    float16_t * pDst, 
 57    float16_t low, 
 58    float16_t high, 
 59    uint32_t numSamples)
 60  {
 61      uint32_t  blkCnt;
 62      f16x8_t curVec0, curVec1;
 63      f16x8_t vecLow, vecHigh;
 64  
 65      vecLow = vdupq_n_f16(low);
 66      vecHigh = vdupq_n_f16(high);
 67  
 68      curVec0 = vld1q(pSrc);
 69      pSrc += 8;
 70      /*
 71       * unrolled x 2 to allow
 72       * vldr/vstr/vmin/vmax
 73       * stall free interleaving
 74       */
 75      blkCnt = numSamples >> 4;
 76      while (blkCnt--)
 77      {
 78          curVec0 = vmaxnmq(curVec0, vecLow);
 79          curVec1 = vld1q(pSrc);
 80          pSrc += 8;
 81          curVec0 = vminnmq(curVec0, vecHigh);
 82          vst1q(pDst, curVec0);
 83          pDst += 8;
 84          curVec1 = vmaxnmq(curVec1, vecLow);
 85          curVec0 = vld1q(pSrc);
 86          pSrc += 8;
 87          curVec1 = vminnmq(curVec1, vecHigh);
 88          vst1q(pDst, curVec1);
 89          pDst += 8;
 90      }
 91      /*
 92       * Tail handling
 93       */
 94      blkCnt = numSamples - ((numSamples >> 4) << 4);
 95      if (blkCnt >= 8)
 96      {
 97          curVec0 = vmaxnmq(curVec0, vecLow);
 98          curVec0 = vminnmq(curVec0, vecHigh);
 99          vst1q(pDst, curVec0);
100          pDst += 8;
101          curVec0 = vld1q(pSrc);
102          pSrc += 8;
103      }
104  
105      if (blkCnt > 0)
106      {
107          mve_pred16_t p0 = vctp16q(blkCnt & 7);
108          curVec0 = vmaxnmq(curVec0, vecLow);
109          curVec0 = vminnmq(curVec0, vecHigh);
110          vstrhq_p(pDst, curVec0, p0);
111      }
112  }
113  
114  #else
115  
116  #if defined(ARM_FLOAT16_SUPPORTED)
117  
118  void arm_clip_f16(const float16_t * pSrc, 
119    float16_t * pDst, 
120    float16_t low, 
121    float16_t high, 
122    uint32_t numSamples)
123  {
124      for (uint32_t i = 0; i < numSamples; i++)
125      {                                        
126          if ((_Float16)pSrc[i] > (_Float16)high)                  
127              pDst[i] = high;                  
128          else if ((_Float16)pSrc[i] < (_Float16)low)              
129              pDst[i] = low;                   
130          else                                 
131              pDst[i] = pSrc[i];               
132      }
133  }
134  #endif /* defined(ARM_FLOAT16_SUPPORTED */
135  
136  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
137  
138  
139  /**
140    @} end of BasicClip group
141   */