/ Drivers / CMSIS / DSP / Source / BasicMathFunctions / arm_clip_f32.c
arm_clip_f32.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_clip_f32.c
  4   * Description:  Floating-point vector addition
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/basic_math_functions.h"
 30  
 31  /**
 32    @ingroup groupMath
 33   */
 34  
 35  /**
 36    @defgroup BasicClip Elementwise clipping
 37  
 38    Element-by-element clipping of a value.
 39  
 40    The value is constrained between 2 bounds.
 41  
 42    There are separate functions for floating-point, Q7, Q15, and Q31 data types.
 43   */
 44  
 45  /**
 46    @addtogroup BasicClip
 47    @{
 48   */
 49  
 50  /**
 51    @brief         Elementwise floating-point clipping
 52    @param[in]     pSrc          points to input values
 53    @param[out]    pDst          points to output clipped values
 54    @param[in]     low           lower bound
 55    @param[in]     high          higher bound
 56    @param[in]     numSamples    number of samples to clip
 57    @return        none
 58   */
 59  #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 60  
 61  #include "arm_helium_utils.h"
 62  
 63  void arm_clip_f32(const float32_t * pSrc, 
 64    float32_t * pDst, 
 65    float32_t low, 
 66    float32_t high, 
 67    uint32_t numSamples)
 68  {
 69      uint32_t  blkCnt;
 70      f32x4_t curVec0, curVec1;
 71      f32x4_t vecLow, vecHigh;
 72  
 73      vecLow = vdupq_n_f32(low);
 74      vecHigh = vdupq_n_f32(high);
 75  
 76      curVec0 = vld1q(pSrc);
 77      pSrc += 4;
 78      /*
 79       * unrolled x 2 to allow
 80       * vldr/vstr/vmin/vmax
 81       * stall free interleaving
 82       */
 83      blkCnt = numSamples >> 3;
 84      while (blkCnt--)
 85      {
 86          curVec0 = vmaxnmq(curVec0, vecLow);
 87          curVec1 = vld1q(pSrc);
 88          pSrc += 4;
 89          curVec0 = vminnmq(curVec0, vecHigh);
 90          vst1q(pDst, curVec0);
 91          pDst += 4;
 92          curVec1 = vmaxnmq(curVec1, vecLow);
 93          curVec0 = vld1q(pSrc);
 94          pSrc += 4;
 95          curVec1 = vminnmq(curVec1, vecHigh);
 96          vst1q(pDst, curVec1);
 97          pDst += 4;
 98      }
 99      /*
100       * Tail handling
101       */
102      blkCnt = numSamples - ((numSamples >> 3) << 3);
103      if (blkCnt >= 4)
104      {
105          curVec0 = vmaxnmq(curVec0, vecLow);
106          curVec0 = vminnmq(curVec0, vecHigh);
107          vst1q(pDst, curVec0);
108          pDst += 4;
109          curVec0 = vld1q(pSrc);
110          pSrc += 4;
111      }
112  
113      if (blkCnt > 0)
114      {
115          mve_pred16_t p0 = vctp32q(blkCnt & 3);
116          curVec0 = vmaxnmq(curVec0, vecLow);
117          curVec0 = vminnmq(curVec0, vecHigh);
118          vstrwq_p(pDst, curVec0, p0);
119      }
120  }
121  
122  #else
123  void arm_clip_f32(const float32_t * pSrc, 
124    float32_t * pDst, 
125    float32_t low, 
126    float32_t high, 
127    uint32_t numSamples)
128  {
129      uint32_t i;
130      for (i = 0; i < numSamples; i++)
131      {                                        
132          if (pSrc[i] > high)                  
133              pDst[i] = high;                  
134          else if (pSrc[i] < low)              
135              pDst[i] = low;                   
136          else                                 
137              pDst[i] = pSrc[i];               
138      }
139  }
140  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
141  
142  /**
143    @} end of BasicClip group
144   */