arm_chebyshev_distance_f16.c
1 2 /* ---------------------------------------------------------------------- 3 * Project: CMSIS DSP Library 4 * Title: arm_chebyshev_distance_f16.c 5 * Description: Chebyshev distance between two vectors 6 * 7 * $Date: 23 April 2021 8 * $Revision: V1.9.0 9 * 10 * Target Processor: Cortex-M and Cortex-A cores 11 * -------------------------------------------------------------------- */ 12 /* 13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 14 * 15 * SPDX-License-Identifier: Apache-2.0 16 * 17 * Licensed under the Apache License, Version 2.0 (the License); you may 18 * not use this file except in compliance with the License. 19 * You may obtain a copy of the License at 20 * 21 * www.apache.org/licenses/LICENSE-2.0 22 * 23 * Unless required by applicable law or agreed to in writing, software 24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 26 * See the License for the specific language governing permissions and 27 * limitations under the License. 28 */ 29 30 #include "dsp/distance_functions_f16.h" 31 32 #if defined(ARM_FLOAT16_SUPPORTED) 33 34 #include <limits.h> 35 #include <math.h> 36 37 /** 38 @ingroup FloatDist 39 */ 40 41 /** 42 @defgroup Chebyshev Chebyshev distance 43 44 Chebyshev distance 45 */ 46 47 /** 48 @addtogroup Chebyshev 49 @{ 50 */ 51 52 53 /** 54 * @brief Chebyshev distance between two vectors 55 * @param[in] pA First vector 56 * @param[in] pB Second vector 57 * @param[in] blockSize vector length 58 * @return distance 59 * 60 */ 61 62 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) 63 64 #include "arm_helium_utils.h" 65 #include "arm_vec_math.h" 66 67 float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize) 68 { 69 uint32_t blkCnt; /* loop counters */ 70 f16x8_t vecA, vecB; 71 f16x8_t vecDiff = vdupq_n_f16(0.0); 72 float16_t maxValue = 0.0f16; 73 74 75 blkCnt = blockSize >> 3; 76 while (blkCnt > 0U) { 77 vecA = vld1q(pA); 78 pA += 8; 79 vecB = vld1q(pB); 80 pB += 8; 81 /* 82 * update per-lane max. 83 */ 84 vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff); 85 /* 86 * Decrement the blockSize loop counter 87 */ 88 blkCnt--; 89 } 90 /* 91 * tail 92 * (will be merged thru tail predication) 93 */ 94 blkCnt = blockSize & 7; 95 if (blkCnt > 0U) { 96 mve_pred16_t p0 = vctp16q(blkCnt); 97 98 vecA = vldrhq_z_f16(pA, p0); 99 vecB = vldrhq_z_f16(pB, p0); 100 101 /* 102 * Get current max per lane and current index per lane 103 * when a max is selected 104 */ 105 vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0); 106 } 107 /* 108 * Get max value across the vector 109 */ 110 return vmaxnmavq(maxValue, vecDiff); 111 } 112 113 #else 114 float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize) 115 { 116 _Float16 diff=0.0f, maxVal,tmpA, tmpB; 117 118 tmpA = *pA++; 119 tmpB = *pB++; 120 diff = (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB)); 121 maxVal = diff; 122 blockSize--; 123 124 while(blockSize > 0) 125 { 126 tmpA = *pA++; 127 tmpB = *pB++; 128 diff = (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB)); 129 if ((_Float16)diff > (_Float16)maxVal) 130 { 131 maxVal = diff; 132 } 133 blockSize --; 134 } 135 136 return(maxVal); 137 } 138 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ 139 140 141 /** 142 * @} end of Chebyshev group 143 */ 144 145 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 146