none.h
1 /****************************************************************************** 2 * @file none.h 3 * @brief Intrinsincs when no DSP extension available 4 * @version V1.9.0 5 * @date 20. July 2020 6 ******************************************************************************/ 7 /* 8 * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. 9 * 10 * SPDX-License-Identifier: Apache-2.0 11 * 12 * Licensed under the Apache License, Version 2.0 (the License); you may 13 * not use this file except in compliance with the License. 14 * You may obtain a copy of the License at 15 * 16 * www.apache.org/licenses/LICENSE-2.0 17 * 18 * Unless required by applicable law or agreed to in writing, software 19 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 20 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21 * See the License for the specific language governing permissions and 22 * limitations under the License. 23 */ 24 25 /* 26 27 Definitions in this file are allowing to reuse some versions of the 28 CMSIS-DSP to build on a core (M0 for instance) or a host where 29 DSP extension are not available. 30 31 Ideally a pure C version should have been used instead. 32 But those are not always available or use a restricted set 33 of intrinsics. 34 35 */ 36 37 #ifndef _NONE_H_ 38 #define _NONE_H_ 39 40 #include "arm_math_types.h" 41 42 #ifdef __cplusplus 43 extern "C" 44 { 45 #endif 46 47 48 49 /* 50 51 Normally those kind of definitions are in a compiler file 52 in Core or Core_A. 53 54 But for MSVC compiler it is a bit special. The goal is very specific 55 to CMSIS-DSP and only to allow the use of this library from other 56 systems like Python or Matlab. 57 58 MSVC is not going to be used to cross-compile to ARM. So, having a MSVC 59 compiler file in Core or Core_A would not make sense. 60 61 */ 62 #if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__) 63 __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data) 64 { 65 if (data == 0U) { return 32U; } 66 67 uint32_t count = 0U; 68 uint32_t mask = 0x80000000U; 69 70 while ((data & mask) == 0U) 71 { 72 count += 1U; 73 mask = mask >> 1U; 74 } 75 return count; 76 } 77 78 __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat) 79 { 80 if ((sat >= 1U) && (sat <= 32U)) 81 { 82 const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U); 83 const int32_t min = -1 - max ; 84 if (val > max) 85 { 86 return max; 87 } 88 else if (val < min) 89 { 90 return min; 91 } 92 } 93 return val; 94 } 95 96 __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat) 97 { 98 if (sat <= 31U) 99 { 100 const uint32_t max = ((1U << sat) - 1U); 101 if (val > (int32_t)max) 102 { 103 return max; 104 } 105 else if (val < 0) 106 { 107 return 0U; 108 } 109 } 110 return (uint32_t)val; 111 } 112 113 /** 114 \brief Rotate Right in unsigned value (32 bit) 115 \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits. 116 \param [in] op1 Value to rotate 117 \param [in] op2 Number of Bits to rotate 118 \return Rotated value 119 */ 120 __STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2) 121 { 122 op2 %= 32U; 123 if (op2 == 0U) 124 { 125 return op1; 126 } 127 return (op1 >> op2) | (op1 << (32U - op2)); 128 } 129 130 131 #endif 132 133 /** 134 * @brief Clips Q63 to Q31 values. 135 */ 136 __STATIC_FORCEINLINE q31_t clip_q63_to_q31( 137 q63_t x) 138 { 139 return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? 140 ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x; 141 } 142 143 /** 144 * @brief Clips Q63 to Q15 values. 145 */ 146 __STATIC_FORCEINLINE q15_t clip_q63_to_q15( 147 q63_t x) 148 { 149 return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? 150 ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15); 151 } 152 153 /** 154 * @brief Clips Q31 to Q7 values. 155 */ 156 __STATIC_FORCEINLINE q7_t clip_q31_to_q7( 157 q31_t x) 158 { 159 return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ? 160 ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x; 161 } 162 163 /** 164 * @brief Clips Q31 to Q15 values. 165 */ 166 __STATIC_FORCEINLINE q15_t clip_q31_to_q15( 167 q31_t x) 168 { 169 return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ? 170 ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x; 171 } 172 173 /** 174 * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format. 175 */ 176 __STATIC_FORCEINLINE q63_t mult32x64( 177 q63_t x, 178 q31_t y) 179 { 180 return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) + 181 (((q63_t) (x >> 32) * y) ) ); 182 } 183 184 /* SMMLAR */ 185 #define multAcc_32x32_keep32_R(a, x, y) \ 186 a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32) 187 188 /* SMMLSR */ 189 #define multSub_32x32_keep32_R(a, x, y) \ 190 a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32) 191 192 /* SMMULR */ 193 #define mult_32x32_keep32_R(a, x, y) \ 194 a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32) 195 196 /* SMMLA */ 197 #define multAcc_32x32_keep32(a, x, y) \ 198 a += (q31_t) (((q63_t) x * y) >> 32) 199 200 /* SMMLS */ 201 #define multSub_32x32_keep32(a, x, y) \ 202 a -= (q31_t) (((q63_t) x * y) >> 32) 203 204 /* SMMUL */ 205 #define mult_32x32_keep32(a, x, y) \ 206 a = (q31_t) (((q63_t) x * y ) >> 32) 207 208 #ifndef ARM_MATH_DSP 209 /** 210 * @brief definition to pack two 16 bit values. 211 */ 212 #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \ 213 (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) ) 214 #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \ 215 (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) ) 216 #endif 217 218 /** 219 * @brief definition to pack four 8 bit values. 220 */ 221 #ifndef ARM_MATH_BIG_ENDIAN 222 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \ 223 (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ 224 (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \ 225 (((int32_t)(v3) << 24) & (int32_t)0xFF000000) ) 226 #else 227 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) << 0) & (int32_t)0x000000FF) | \ 228 (((int32_t)(v2) << 8) & (int32_t)0x0000FF00) | \ 229 (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \ 230 (((int32_t)(v0) << 24) & (int32_t)0xFF000000) ) 231 #endif 232 233 234 235 236 /* 237 * @brief C custom defined intrinsic functions 238 */ 239 #if !defined (ARM_MATH_DSP) 240 241 242 /* 243 * @brief C custom defined QADD8 244 */ 245 __STATIC_FORCEINLINE uint32_t __QADD8( 246 uint32_t x, 247 uint32_t y) 248 { 249 q31_t r, s, t, u; 250 251 r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; 252 s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; 253 t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; 254 u = __SSAT(((((q31_t)x ) >> 24) + (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; 255 256 return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); 257 } 258 259 260 /* 261 * @brief C custom defined QSUB8 262 */ 263 __STATIC_FORCEINLINE uint32_t __QSUB8( 264 uint32_t x, 265 uint32_t y) 266 { 267 q31_t r, s, t, u; 268 269 r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; 270 s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; 271 t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; 272 u = __SSAT(((((q31_t)x ) >> 24) - (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; 273 274 return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); 275 } 276 277 278 /* 279 * @brief C custom defined QADD16 280 */ 281 __STATIC_FORCEINLINE uint32_t __QADD16( 282 uint32_t x, 283 uint32_t y) 284 { 285 /* q31_t r, s; without initialisation 'arm_offset_q15 test' fails but 'intrinsic' tests pass! for armCC */ 286 q31_t r = 0, s = 0; 287 288 r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; 289 s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; 290 291 return ((uint32_t)((s << 16) | (r ))); 292 } 293 294 295 /* 296 * @brief C custom defined SHADD16 297 */ 298 __STATIC_FORCEINLINE uint32_t __SHADD16( 299 uint32_t x, 300 uint32_t y) 301 { 302 q31_t r, s; 303 304 r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; 305 s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; 306 307 return ((uint32_t)((s << 16) | (r ))); 308 } 309 310 311 /* 312 * @brief C custom defined QSUB16 313 */ 314 __STATIC_FORCEINLINE uint32_t __QSUB16( 315 uint32_t x, 316 uint32_t y) 317 { 318 q31_t r, s; 319 320 r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; 321 s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; 322 323 return ((uint32_t)((s << 16) | (r ))); 324 } 325 326 327 /* 328 * @brief C custom defined SHSUB16 329 */ 330 __STATIC_FORCEINLINE uint32_t __SHSUB16( 331 uint32_t x, 332 uint32_t y) 333 { 334 q31_t r, s; 335 336 r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; 337 s = (((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; 338 339 return ((uint32_t)((s << 16) | (r ))); 340 } 341 342 343 /* 344 * @brief C custom defined QASX 345 */ 346 __STATIC_FORCEINLINE uint32_t __QASX( 347 uint32_t x, 348 uint32_t y) 349 { 350 q31_t r, s; 351 352 r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; 353 s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; 354 355 return ((uint32_t)((s << 16) | (r ))); 356 } 357 358 359 /* 360 * @brief C custom defined SHASX 361 */ 362 __STATIC_FORCEINLINE uint32_t __SHASX( 363 uint32_t x, 364 uint32_t y) 365 { 366 q31_t r, s; 367 368 r = (((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; 369 s = (((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; 370 371 return ((uint32_t)((s << 16) | (r ))); 372 } 373 374 375 /* 376 * @brief C custom defined QSAX 377 */ 378 __STATIC_FORCEINLINE uint32_t __QSAX( 379 uint32_t x, 380 uint32_t y) 381 { 382 q31_t r, s; 383 384 r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; 385 s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; 386 387 return ((uint32_t)((s << 16) | (r ))); 388 } 389 390 391 /* 392 * @brief C custom defined SHSAX 393 */ 394 __STATIC_FORCEINLINE uint32_t __SHSAX( 395 uint32_t x, 396 uint32_t y) 397 { 398 q31_t r, s; 399 400 r = (((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; 401 s = (((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; 402 403 return ((uint32_t)((s << 16) | (r ))); 404 } 405 406 407 /* 408 * @brief C custom defined SMUSDX 409 */ 410 __STATIC_FORCEINLINE uint32_t __SMUSDX( 411 uint32_t x, 412 uint32_t y) 413 { 414 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - 415 ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); 416 } 417 418 /* 419 * @brief C custom defined SMUADX 420 */ 421 __STATIC_FORCEINLINE uint32_t __SMUADX( 422 uint32_t x, 423 uint32_t y) 424 { 425 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + 426 ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); 427 } 428 429 430 /* 431 * @brief C custom defined QADD 432 */ 433 __STATIC_FORCEINLINE int32_t __QADD( 434 int32_t x, 435 int32_t y) 436 { 437 return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y))); 438 } 439 440 441 /* 442 * @brief C custom defined QSUB 443 */ 444 __STATIC_FORCEINLINE int32_t __QSUB( 445 int32_t x, 446 int32_t y) 447 { 448 return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y))); 449 } 450 451 452 /* 453 * @brief C custom defined SMLAD 454 */ 455 __STATIC_FORCEINLINE uint32_t __SMLAD( 456 uint32_t x, 457 uint32_t y, 458 uint32_t sum) 459 { 460 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + 461 ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + 462 ( ((q31_t)sum ) ) )); 463 } 464 465 466 /* 467 * @brief C custom defined SMLADX 468 */ 469 __STATIC_FORCEINLINE uint32_t __SMLADX( 470 uint32_t x, 471 uint32_t y, 472 uint32_t sum) 473 { 474 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + 475 ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + 476 ( ((q31_t)sum ) ) )); 477 } 478 479 480 /* 481 * @brief C custom defined SMLSDX 482 */ 483 __STATIC_FORCEINLINE uint32_t __SMLSDX( 484 uint32_t x, 485 uint32_t y, 486 uint32_t sum) 487 { 488 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - 489 ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + 490 ( ((q31_t)sum ) ) )); 491 } 492 493 494 /* 495 * @brief C custom defined SMLALD 496 */ 497 __STATIC_FORCEINLINE uint64_t __SMLALD( 498 uint32_t x, 499 uint32_t y, 500 uint64_t sum) 501 { 502 /* return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */ 503 return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + 504 ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + 505 ( ((q63_t)sum ) ) )); 506 } 507 508 509 /* 510 * @brief C custom defined SMLALDX 511 */ 512 __STATIC_FORCEINLINE uint64_t __SMLALDX( 513 uint32_t x, 514 uint32_t y, 515 uint64_t sum) 516 { 517 /* return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */ 518 return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + 519 ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + 520 ( ((q63_t)sum ) ) )); 521 } 522 523 524 /* 525 * @brief C custom defined SMUAD 526 */ 527 __STATIC_FORCEINLINE uint32_t __SMUAD( 528 uint32_t x, 529 uint32_t y) 530 { 531 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + 532 ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); 533 } 534 535 536 /* 537 * @brief C custom defined SMUSD 538 */ 539 __STATIC_FORCEINLINE uint32_t __SMUSD( 540 uint32_t x, 541 uint32_t y) 542 { 543 return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) - 544 ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); 545 } 546 547 548 /* 549 * @brief C custom defined SXTB16 550 */ 551 __STATIC_FORCEINLINE uint32_t __SXTB16( 552 uint32_t x) 553 { 554 return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) | 555 ((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000) )); 556 } 557 558 /* 559 * @brief C custom defined SMMLA 560 */ 561 __STATIC_FORCEINLINE int32_t __SMMLA( 562 int32_t x, 563 int32_t y, 564 int32_t sum) 565 { 566 return (sum + (int32_t) (((int64_t) x * y) >> 32)); 567 } 568 569 #endif /* !defined (ARM_MATH_DSP) */ 570 571 572 #ifdef __cplusplus 573 } 574 #endif 575 576 #endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */