/ Drivers / CMSIS / DSP / Source / DistanceFunctions / arm_boolean_distance_template.h
arm_boolean_distance_template.h
  1  
  2  /* ----------------------------------------------------------------------
  3   * Project:      CMSIS DSP Library
  4   * Title:        arm_boolean_distance.c
  5   * Description:  Templates for boolean distances
  6   *
  7   * $Date:        23 April 2021
  8   * $Revision:    V1.9.0
  9   *
 10   * Target Processor: Cortex-M and Cortex-A cores
 11   * -------------------------------------------------------------------- */
 12  /*
 13   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 14   *
 15   * SPDX-License-Identifier: Apache-2.0
 16   *
 17   * Licensed under the Apache License, Version 2.0 (the License); you may
 18   * not use this file except in compliance with the License.
 19   * You may obtain a copy of the License at
 20   *
 21   * www.apache.org/licenses/LICENSE-2.0
 22   *
 23   * Unless required by applicable law or agreed to in writing, software
 24   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 25   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 26   * See the License for the specific language governing permissions and
 27   * limitations under the License.
 28   */
 29  
 30  
 31  
 32  
 33  /**
 34   * @defgroup DISTANCEF Distance Functions
 35   *
 36   * Computes Distances between vectors. 
 37   *
 38   * Distance functions are useful in a lot of algorithms.
 39   *
 40   */
 41  
 42  
 43  /**
 44   * @addtogroup DISTANCEF
 45   * @{
 46   */
 47  
 48  
 49  
 50  
 51  #define _FUNC(A,B) A##B 
 52  
 53  #define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
 54  
 55  /**
 56   * @brief        Elements of boolean distances
 57   *
 58   * Different values which are used to compute boolean distances
 59   *
 60   * @param[in]    pA              First vector of packed booleans
 61   * @param[in]    pB              Second vector of packed booleans
 62   * @param[in]    numberOfBools   Number of booleans
 63   * @return None
 64   *
 65   */
 66  
 67  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 68  
 69  #include "arm_common_tables.h"
 70  
 71  void FUNC(EXT)(const uint32_t *pA
 72         , const uint32_t *pB
 73         , uint32_t numberOfBools
 74  #ifdef TT
 75         , uint32_t *cTT
 76  #endif
 77  #ifdef FF
 78         , uint32_t *cFF
 79  #endif
 80  #ifdef TF
 81         , uint32_t *cTF
 82  #endif
 83  #ifdef FT
 84         , uint32_t *cFT
 85  #endif
 86         )
 87  {
 88  
 89  #ifdef TT
 90      uint32_t _ctt=0;
 91  #endif
 92  #ifdef FF
 93      uint32_t _cff=0;
 94  #endif
 95  #ifdef TF
 96      uint32_t _ctf=0;
 97  #endif
 98  #ifdef FT
 99      uint32_t _cft=0;
100  #endif
101      uint32_t        a, b, ba, bb;
102      int shift;
103      const uint8_t  *pA8 = (const uint8_t *) pA;
104      const uint8_t  *pB8 = (const uint8_t *) pB;
105  
106      /* handle vector blocks */
107      uint32_t         blkCnt = numberOfBools / 128;
108  
109  
110  
111      while (blkCnt > 0U) {
112          uint8x16_t      vecA = vld1q((const uint8_t *) pA8);
113          uint8x16_t      vecB = vld1q((const uint8_t *) pB8);
114  
115  #ifdef TT
116          uint8x16_t      vecTT = vecA & vecB;
117          vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
118          _ctt += vaddvq(vecTT);
119  #endif
120  #ifdef FF
121          uint8x16_t      vecFF = vmvnq(vecA) & vmvnq(vecB);
122          vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
123          _cff += vaddvq(vecFF);
124  #endif
125  #ifdef TF
126          uint8x16_t      vecTF = vecA & vmvnq(vecB);
127          vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
128          _ctf += vaddvq(vecTF);
129  #endif
130  #ifdef FT
131          uint8x16_t      vecFT = vmvnq(vecA) & vecB;
132          vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
133          _cft += vaddvq(vecFT);
134  #endif
135  
136          pA8 += 16;
137          pB8 += 16;
138          blkCnt--;
139  
140      }
141  
142      pA = (const uint32_t *)pA8;
143      pB = (const uint32_t *)pB8;
144  
145      blkCnt = numberOfBools & 0x7F;
146      while(blkCnt >= 32)
147      {
148         a = *pA++;
149         b = *pB++;
150         shift = 0;
151         while(shift < 32)
152         {
153            ba = a & 1;
154            bb = b & 1;
155            a = a >> 1;
156            b = b >> 1;
157  
158  #ifdef TT
159            _ctt += (ba && bb);
160  #endif
161  #ifdef FF
162            _cff += ((1 ^ ba) && (1 ^ bb));
163  #endif
164  #ifdef TF
165            _ctf += (ba && (1 ^ bb));
166  #endif
167  #ifdef FT
168            _cft += ((1 ^ ba) && bb);
169  #endif
170            shift ++;
171         }
172  
173         blkCnt -= 32;
174      }
175  
176      a = *pA++;
177      b = *pB++;
178  
179      a = a >> (32 - blkCnt);
180      b = b >> (32 - blkCnt);
181  
182      while(blkCnt > 0)
183      {
184            ba = a & 1;
185            bb = b & 1;
186            a = a >> 1;
187  
188            b = b >> 1;
189  #ifdef TT
190            _ctt += (ba && bb);
191  #endif
192  #ifdef FF
193            _cff += ((1 ^ ba) && (1 ^ bb));
194  #endif
195  #ifdef TF
196            _ctf += (ba && (1 ^ bb));
197  #endif
198  #ifdef FT
199            _cft += ((1 ^ ba) && bb);
200  #endif
201            blkCnt --;
202      }
203  
204  #ifdef TT
205      *cTT = _ctt;
206  #endif
207  #ifdef FF
208      *cFF = _cff;
209  #endif
210  #ifdef TF
211      *cTF = _ctf;
212  #endif
213  #ifdef FT
214      *cFT = _cft;
215  #endif
216  }
217  
218  #else
219  #if defined(ARM_MATH_NEON)
220  
221  
222  void FUNC(EXT)(const uint32_t *pA
223         , const uint32_t *pB
224         , uint32_t numberOfBools
225  #ifdef TT
226         , uint32_t *cTT
227  #endif
228  #ifdef FF
229         , uint32_t *cFF
230  #endif
231  #ifdef TF
232         , uint32_t *cTF
233  #endif
234  #ifdef FT
235         , uint32_t *cFT
236  #endif
237         )
238  {
239  #ifdef TT
240      uint32_t _ctt=0;
241  #endif
242  #ifdef FF
243      uint32_t _cff=0;
244  #endif
245  #ifdef TF
246      uint32_t _ctf=0;
247  #endif
248  #ifdef FT
249      uint32_t _cft=0;
250  #endif
251      uint32_t nbBoolBlock;
252      uint32_t a,b,ba,bb;
253      int shift;
254      uint32x4_t aV, bV;
255  #ifdef TT
256      uint32x4_t cttV;
257  #endif
258  #ifdef FF
259      uint32x4_t cffV;
260  #endif
261  #ifdef TF
262      uint32x4_t ctfV;
263  #endif
264  #ifdef FT
265      uint32x4_t cftV;
266  #endif
267      uint8x16_t tmp;
268      uint16x8_t tmp2;
269      uint32x4_t tmp3;
270      uint64x2_t tmp4;
271  #ifdef TT
272      uint64x2_t tmp4tt;
273  #endif
274  #ifdef FF
275      uint64x2_t tmp4ff;
276  #endif
277  #ifdef TF
278      uint64x2_t tmp4tf;
279  #endif
280  #ifdef FT
281      uint64x2_t tmp4ft;
282  #endif
283  
284  #ifdef TT
285      tmp4tt = vdupq_n_u64(0);
286  #endif
287  #ifdef FF
288      tmp4ff = vdupq_n_u64(0);
289  #endif
290  #ifdef TF
291      tmp4tf = vdupq_n_u64(0);
292  #endif
293  #ifdef FT
294      tmp4ft = vdupq_n_u64(0);
295  #endif
296  
297      nbBoolBlock = numberOfBools >> 7;
298      while(nbBoolBlock > 0)
299      {
300         aV = vld1q_u32(pA);
301         bV = vld1q_u32(pB);
302         pA += 4;
303         pB += 4;
304  
305  #ifdef TT
306         cttV = vandq_u32(aV,bV);
307  #endif
308  #ifdef FF
309         cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
310  #endif
311  #ifdef TF
312         ctfV = vandq_u32(aV,vmvnq_u32(bV));
313  #endif
314  #ifdef FT
315         cftV = vandq_u32(vmvnq_u32(aV),bV);
316  #endif
317  
318  #ifdef TT
319         tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
320         tmp2 = vpaddlq_u8(tmp);
321         tmp3 = vpaddlq_u16(tmp2);
322         tmp4 = vpaddlq_u32(tmp3);
323         tmp4tt = vaddq_u64(tmp4tt, tmp4);
324  #endif
325  
326  #ifdef FF
327         tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
328         tmp2 = vpaddlq_u8(tmp);
329         tmp3 = vpaddlq_u16(tmp2);
330         tmp4 = vpaddlq_u32(tmp3);
331         tmp4ff = vaddq_u64(tmp4ff, tmp4);
332  #endif
333  
334  #ifdef TF
335         tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
336         tmp2 = vpaddlq_u8(tmp);
337         tmp3 = vpaddlq_u16(tmp2);
338         tmp4 = vpaddlq_u32(tmp3);
339         tmp4tf = vaddq_u64(tmp4tf, tmp4);
340  #endif 
341  
342  #ifdef FT
343         tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
344         tmp2 = vpaddlq_u8(tmp);
345         tmp3 = vpaddlq_u16(tmp2);
346         tmp4 = vpaddlq_u32(tmp3);
347         tmp4ft = vaddq_u64(tmp4ft, tmp4);
348  #endif
349  
350  
351         nbBoolBlock --;
352      }
353  
354  #ifdef TT
355      _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
356  #endif
357  #ifdef FF
358      _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
359  #endif
360  #ifdef TF
361      _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
362  #endif
363  #ifdef FT
364      _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
365  #endif
366  
367      nbBoolBlock = numberOfBools & 0x7F;
368      while(nbBoolBlock >= 32)
369      {
370         a = *pA++;
371         b = *pB++;
372         shift = 0;
373         while(shift < 32)
374         {
375            ba = a & 1;
376            bb = b & 1;
377            a = a >> 1;
378            b = b >> 1;
379  
380  #ifdef TT
381            _ctt += (ba && bb);
382  #endif
383  #ifdef FF
384            _cff += ((1 ^ ba) && (1 ^ bb));
385  #endif
386  #ifdef TF
387            _ctf += (ba && (1 ^ bb));
388  #endif
389  #ifdef FT
390            _cft += ((1 ^ ba) && bb);
391  #endif
392            shift ++;
393         }
394  
395         nbBoolBlock -= 32;
396      }
397  
398      a = *pA++;
399      b = *pB++;
400  
401      a = a >> (32 - nbBoolBlock);
402      b = b >> (32 - nbBoolBlock);
403  
404      while(nbBoolBlock > 0)
405      {
406            ba = a & 1;
407            bb = b & 1;
408            a = a >> 1;
409  
410            b = b >> 1;
411  #ifdef TT
412            _ctt += (ba && bb);
413  #endif
414  #ifdef FF
415            _cff += ((1 ^ ba) && (1 ^ bb));
416  #endif
417  #ifdef TF
418            _ctf += (ba && (1 ^ bb));
419  #endif
420  #ifdef FT
421            _cft += ((1 ^ ba) && bb);
422  #endif
423            nbBoolBlock --;
424      }
425  
426  #ifdef TT
427      *cTT = _ctt;
428  #endif
429  #ifdef FF
430      *cFF = _cff;
431  #endif
432  #ifdef TF
433      *cTF = _ctf;
434  #endif
435  #ifdef FT
436      *cFT = _cft;
437  #endif
438  }
439  
440  #else
441  
442  void FUNC(EXT)(const uint32_t *pA
443         , const uint32_t *pB
444         , uint32_t numberOfBools
445  #ifdef TT
446         , uint32_t *cTT
447  #endif
448  #ifdef FF
449         , uint32_t *cFF
450  #endif
451  #ifdef TF
452         , uint32_t *cTF
453  #endif
454  #ifdef FT
455         , uint32_t *cFT
456  #endif
457         )
458  {
459    
460  #ifdef TT
461      uint32_t _ctt=0;
462  #endif
463  #ifdef FF
464      uint32_t _cff=0;
465  #endif
466  #ifdef TF
467      uint32_t _ctf=0;
468  #endif
469  #ifdef FT
470      uint32_t _cft=0;
471  #endif
472      uint32_t a,b,ba,bb;
473      int shift;
474  
475      while(numberOfBools >= 32)
476      {
477         a = *pA++;
478         b = *pB++;
479         shift = 0;
480         while(shift < 32)
481         {
482            ba = a & 1;
483            bb = b & 1;
484            a = a >> 1;
485            b = b >> 1;
486  #ifdef TT
487            _ctt += (ba && bb);
488  #endif
489  #ifdef FF
490            _cff += ((1 ^ ba) && (1 ^ bb));
491  #endif
492  #ifdef TF
493            _ctf += (ba && (1 ^ bb));
494  #endif
495  #ifdef FT
496            _cft += ((1 ^ ba) && bb);
497  #endif
498            shift ++;
499         }
500  
501         numberOfBools -= 32;
502      }
503  
504      a = *pA++;
505      b = *pB++;
506  
507      a = a >> (32 - numberOfBools);
508      b = b >> (32 - numberOfBools);
509  
510      while(numberOfBools > 0)
511      {
512            ba = a & 1;
513            bb = b & 1;
514            a = a >> 1;
515            b = b >> 1;
516  
517  #ifdef TT
518            _ctt += (ba && bb);
519  #endif
520  #ifdef FF
521            _cff += ((1 ^ ba) && (1 ^ bb));
522  #endif
523  #ifdef TF
524            _ctf += (ba && (1 ^ bb));
525  #endif
526  #ifdef FT
527            _cft += ((1 ^ ba) && bb);
528  #endif
529            numberOfBools --;
530      }
531  
532  #ifdef TT
533      *cTT = _ctt;
534  #endif
535  #ifdef FF
536      *cFF = _cff;
537  #endif
538  #ifdef TF 
539      *cTF = _ctf;
540  #endif
541  #ifdef FT
542      *cFT = _cft;
543  #endif
544  }
545  #endif
546  #endif /* defined(ARM_MATH_MVEI) */
547  
548  
549  /**
550   * @} end of DISTANCEF group
551   */