arm_boolean_distance_template.h
1 2 /* ---------------------------------------------------------------------- 3 * Project: CMSIS DSP Library 4 * Title: arm_boolean_distance.c 5 * Description: Templates for boolean distances 6 * 7 * $Date: 23 April 2021 8 * $Revision: V1.9.0 9 * 10 * Target Processor: Cortex-M and Cortex-A cores 11 * -------------------------------------------------------------------- */ 12 /* 13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 14 * 15 * SPDX-License-Identifier: Apache-2.0 16 * 17 * Licensed under the Apache License, Version 2.0 (the License); you may 18 * not use this file except in compliance with the License. 19 * You may obtain a copy of the License at 20 * 21 * www.apache.org/licenses/LICENSE-2.0 22 * 23 * Unless required by applicable law or agreed to in writing, software 24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 26 * See the License for the specific language governing permissions and 27 * limitations under the License. 28 */ 29 30 31 32 33 /** 34 * @defgroup DISTANCEF Distance Functions 35 * 36 * Computes Distances between vectors. 37 * 38 * Distance functions are useful in a lot of algorithms. 39 * 40 */ 41 42 43 /** 44 * @addtogroup DISTANCEF 45 * @{ 46 */ 47 48 49 50 51 #define _FUNC(A,B) A##B 52 53 #define FUNC(EXT) _FUNC(arm_boolean_distance, EXT) 54 55 /** 56 * @brief Elements of boolean distances 57 * 58 * Different values which are used to compute boolean distances 59 * 60 * @param[in] pA First vector of packed booleans 61 * @param[in] pB Second vector of packed booleans 62 * @param[in] numberOfBools Number of booleans 63 * @return None 64 * 65 */ 66 67 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 68 69 #include "arm_common_tables.h" 70 71 void FUNC(EXT)(const uint32_t *pA 72 , const uint32_t *pB 73 , uint32_t numberOfBools 74 #ifdef TT 75 , uint32_t *cTT 76 #endif 77 #ifdef FF 78 , uint32_t *cFF 79 #endif 80 #ifdef TF 81 , uint32_t *cTF 82 #endif 83 #ifdef FT 84 , uint32_t *cFT 85 #endif 86 ) 87 { 88 89 #ifdef TT 90 uint32_t _ctt=0; 91 #endif 92 #ifdef FF 93 uint32_t _cff=0; 94 #endif 95 #ifdef TF 96 uint32_t _ctf=0; 97 #endif 98 #ifdef FT 99 uint32_t _cft=0; 100 #endif 101 uint32_t a, b, ba, bb; 102 int shift; 103 const uint8_t *pA8 = (const uint8_t *) pA; 104 const uint8_t *pB8 = (const uint8_t *) pB; 105 106 /* handle vector blocks */ 107 uint32_t blkCnt = numberOfBools / 128; 108 109 110 111 while (blkCnt > 0U) { 112 uint8x16_t vecA = vld1q((const uint8_t *) pA8); 113 uint8x16_t vecB = vld1q((const uint8_t *) pB8); 114 115 #ifdef TT 116 uint8x16_t vecTT = vecA & vecB; 117 vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT); 118 _ctt += vaddvq(vecTT); 119 #endif 120 #ifdef FF 121 uint8x16_t vecFF = vmvnq(vecA) & vmvnq(vecB); 122 vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF); 123 _cff += vaddvq(vecFF); 124 #endif 125 #ifdef TF 126 uint8x16_t vecTF = vecA & vmvnq(vecB); 127 vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF); 128 _ctf += vaddvq(vecTF); 129 #endif 130 #ifdef FT 131 uint8x16_t vecFT = vmvnq(vecA) & vecB; 132 vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT); 133 _cft += vaddvq(vecFT); 134 #endif 135 136 pA8 += 16; 137 pB8 += 16; 138 blkCnt--; 139 140 } 141 142 pA = (const uint32_t *)pA8; 143 pB = (const uint32_t *)pB8; 144 145 blkCnt = numberOfBools & 0x7F; 146 while(blkCnt >= 32) 147 { 148 a = *pA++; 149 b = *pB++; 150 shift = 0; 151 while(shift < 32) 152 { 153 ba = a & 1; 154 bb = b & 1; 155 a = a >> 1; 156 b = b >> 1; 157 158 #ifdef TT 159 _ctt += (ba && bb); 160 #endif 161 #ifdef FF 162 _cff += ((1 ^ ba) && (1 ^ bb)); 163 #endif 164 #ifdef TF 165 _ctf += (ba && (1 ^ bb)); 166 #endif 167 #ifdef FT 168 _cft += ((1 ^ ba) && bb); 169 #endif 170 shift ++; 171 } 172 173 blkCnt -= 32; 174 } 175 176 a = *pA++; 177 b = *pB++; 178 179 a = a >> (32 - blkCnt); 180 b = b >> (32 - blkCnt); 181 182 while(blkCnt > 0) 183 { 184 ba = a & 1; 185 bb = b & 1; 186 a = a >> 1; 187 188 b = b >> 1; 189 #ifdef TT 190 _ctt += (ba && bb); 191 #endif 192 #ifdef FF 193 _cff += ((1 ^ ba) && (1 ^ bb)); 194 #endif 195 #ifdef TF 196 _ctf += (ba && (1 ^ bb)); 197 #endif 198 #ifdef FT 199 _cft += ((1 ^ ba) && bb); 200 #endif 201 blkCnt --; 202 } 203 204 #ifdef TT 205 *cTT = _ctt; 206 #endif 207 #ifdef FF 208 *cFF = _cff; 209 #endif 210 #ifdef TF 211 *cTF = _ctf; 212 #endif 213 #ifdef FT 214 *cFT = _cft; 215 #endif 216 } 217 218 #else 219 #if defined(ARM_MATH_NEON) 220 221 222 void FUNC(EXT)(const uint32_t *pA 223 , const uint32_t *pB 224 , uint32_t numberOfBools 225 #ifdef TT 226 , uint32_t *cTT 227 #endif 228 #ifdef FF 229 , uint32_t *cFF 230 #endif 231 #ifdef TF 232 , uint32_t *cTF 233 #endif 234 #ifdef FT 235 , uint32_t *cFT 236 #endif 237 ) 238 { 239 #ifdef TT 240 uint32_t _ctt=0; 241 #endif 242 #ifdef FF 243 uint32_t _cff=0; 244 #endif 245 #ifdef TF 246 uint32_t _ctf=0; 247 #endif 248 #ifdef FT 249 uint32_t _cft=0; 250 #endif 251 uint32_t nbBoolBlock; 252 uint32_t a,b,ba,bb; 253 int shift; 254 uint32x4_t aV, bV; 255 #ifdef TT 256 uint32x4_t cttV; 257 #endif 258 #ifdef FF 259 uint32x4_t cffV; 260 #endif 261 #ifdef TF 262 uint32x4_t ctfV; 263 #endif 264 #ifdef FT 265 uint32x4_t cftV; 266 #endif 267 uint8x16_t tmp; 268 uint16x8_t tmp2; 269 uint32x4_t tmp3; 270 uint64x2_t tmp4; 271 #ifdef TT 272 uint64x2_t tmp4tt; 273 #endif 274 #ifdef FF 275 uint64x2_t tmp4ff; 276 #endif 277 #ifdef TF 278 uint64x2_t tmp4tf; 279 #endif 280 #ifdef FT 281 uint64x2_t tmp4ft; 282 #endif 283 284 #ifdef TT 285 tmp4tt = vdupq_n_u64(0); 286 #endif 287 #ifdef FF 288 tmp4ff = vdupq_n_u64(0); 289 #endif 290 #ifdef TF 291 tmp4tf = vdupq_n_u64(0); 292 #endif 293 #ifdef FT 294 tmp4ft = vdupq_n_u64(0); 295 #endif 296 297 nbBoolBlock = numberOfBools >> 7; 298 while(nbBoolBlock > 0) 299 { 300 aV = vld1q_u32(pA); 301 bV = vld1q_u32(pB); 302 pA += 4; 303 pB += 4; 304 305 #ifdef TT 306 cttV = vandq_u32(aV,bV); 307 #endif 308 #ifdef FF 309 cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV)); 310 #endif 311 #ifdef TF 312 ctfV = vandq_u32(aV,vmvnq_u32(bV)); 313 #endif 314 #ifdef FT 315 cftV = vandq_u32(vmvnq_u32(aV),bV); 316 #endif 317 318 #ifdef TT 319 tmp = vcntq_u8(vreinterpretq_u8_u32(cttV)); 320 tmp2 = vpaddlq_u8(tmp); 321 tmp3 = vpaddlq_u16(tmp2); 322 tmp4 = vpaddlq_u32(tmp3); 323 tmp4tt = vaddq_u64(tmp4tt, tmp4); 324 #endif 325 326 #ifdef FF 327 tmp = vcntq_u8(vreinterpretq_u8_u32(cffV)); 328 tmp2 = vpaddlq_u8(tmp); 329 tmp3 = vpaddlq_u16(tmp2); 330 tmp4 = vpaddlq_u32(tmp3); 331 tmp4ff = vaddq_u64(tmp4ff, tmp4); 332 #endif 333 334 #ifdef TF 335 tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV)); 336 tmp2 = vpaddlq_u8(tmp); 337 tmp3 = vpaddlq_u16(tmp2); 338 tmp4 = vpaddlq_u32(tmp3); 339 tmp4tf = vaddq_u64(tmp4tf, tmp4); 340 #endif 341 342 #ifdef FT 343 tmp = vcntq_u8(vreinterpretq_u8_u32(cftV)); 344 tmp2 = vpaddlq_u8(tmp); 345 tmp3 = vpaddlq_u16(tmp2); 346 tmp4 = vpaddlq_u32(tmp3); 347 tmp4ft = vaddq_u64(tmp4ft, tmp4); 348 #endif 349 350 351 nbBoolBlock --; 352 } 353 354 #ifdef TT 355 _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1); 356 #endif 357 #ifdef FF 358 _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1); 359 #endif 360 #ifdef TF 361 _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1); 362 #endif 363 #ifdef FT 364 _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1); 365 #endif 366 367 nbBoolBlock = numberOfBools & 0x7F; 368 while(nbBoolBlock >= 32) 369 { 370 a = *pA++; 371 b = *pB++; 372 shift = 0; 373 while(shift < 32) 374 { 375 ba = a & 1; 376 bb = b & 1; 377 a = a >> 1; 378 b = b >> 1; 379 380 #ifdef TT 381 _ctt += (ba && bb); 382 #endif 383 #ifdef FF 384 _cff += ((1 ^ ba) && (1 ^ bb)); 385 #endif 386 #ifdef TF 387 _ctf += (ba && (1 ^ bb)); 388 #endif 389 #ifdef FT 390 _cft += ((1 ^ ba) && bb); 391 #endif 392 shift ++; 393 } 394 395 nbBoolBlock -= 32; 396 } 397 398 a = *pA++; 399 b = *pB++; 400 401 a = a >> (32 - nbBoolBlock); 402 b = b >> (32 - nbBoolBlock); 403 404 while(nbBoolBlock > 0) 405 { 406 ba = a & 1; 407 bb = b & 1; 408 a = a >> 1; 409 410 b = b >> 1; 411 #ifdef TT 412 _ctt += (ba && bb); 413 #endif 414 #ifdef FF 415 _cff += ((1 ^ ba) && (1 ^ bb)); 416 #endif 417 #ifdef TF 418 _ctf += (ba && (1 ^ bb)); 419 #endif 420 #ifdef FT 421 _cft += ((1 ^ ba) && bb); 422 #endif 423 nbBoolBlock --; 424 } 425 426 #ifdef TT 427 *cTT = _ctt; 428 #endif 429 #ifdef FF 430 *cFF = _cff; 431 #endif 432 #ifdef TF 433 *cTF = _ctf; 434 #endif 435 #ifdef FT 436 *cFT = _cft; 437 #endif 438 } 439 440 #else 441 442 void FUNC(EXT)(const uint32_t *pA 443 , const uint32_t *pB 444 , uint32_t numberOfBools 445 #ifdef TT 446 , uint32_t *cTT 447 #endif 448 #ifdef FF 449 , uint32_t *cFF 450 #endif 451 #ifdef TF 452 , uint32_t *cTF 453 #endif 454 #ifdef FT 455 , uint32_t *cFT 456 #endif 457 ) 458 { 459 460 #ifdef TT 461 uint32_t _ctt=0; 462 #endif 463 #ifdef FF 464 uint32_t _cff=0; 465 #endif 466 #ifdef TF 467 uint32_t _ctf=0; 468 #endif 469 #ifdef FT 470 uint32_t _cft=0; 471 #endif 472 uint32_t a,b,ba,bb; 473 int shift; 474 475 while(numberOfBools >= 32) 476 { 477 a = *pA++; 478 b = *pB++; 479 shift = 0; 480 while(shift < 32) 481 { 482 ba = a & 1; 483 bb = b & 1; 484 a = a >> 1; 485 b = b >> 1; 486 #ifdef TT 487 _ctt += (ba && bb); 488 #endif 489 #ifdef FF 490 _cff += ((1 ^ ba) && (1 ^ bb)); 491 #endif 492 #ifdef TF 493 _ctf += (ba && (1 ^ bb)); 494 #endif 495 #ifdef FT 496 _cft += ((1 ^ ba) && bb); 497 #endif 498 shift ++; 499 } 500 501 numberOfBools -= 32; 502 } 503 504 a = *pA++; 505 b = *pB++; 506 507 a = a >> (32 - numberOfBools); 508 b = b >> (32 - numberOfBools); 509 510 while(numberOfBools > 0) 511 { 512 ba = a & 1; 513 bb = b & 1; 514 a = a >> 1; 515 b = b >> 1; 516 517 #ifdef TT 518 _ctt += (ba && bb); 519 #endif 520 #ifdef FF 521 _cff += ((1 ^ ba) && (1 ^ bb)); 522 #endif 523 #ifdef TF 524 _ctf += (ba && (1 ^ bb)); 525 #endif 526 #ifdef FT 527 _cft += ((1 ^ ba) && bb); 528 #endif 529 numberOfBools --; 530 } 531 532 #ifdef TT 533 *cTT = _ctt; 534 #endif 535 #ifdef FF 536 *cFF = _cff; 537 #endif 538 #ifdef TF 539 *cTF = _ctf; 540 #endif 541 #ifdef FT 542 *cFT = _cft; 543 #endif 544 } 545 #endif 546 #endif /* defined(ARM_MATH_MVEI) */ 547 548 549 /** 550 * @} end of DISTANCEF group 551 */